mirror of
https://github.com/sd-webui/stable-diffusion-webui.git
synced 2024-12-15 07:12:58 +03:00
1688 lines
70 KiB
Python
1688 lines
70 KiB
Python
|
|
||
|
import argparse, os, sys, glob, re, time
|
||
|
import collections
|
||
|
import yaml
|
||
|
import math
|
||
|
import random
|
||
|
from typing import List, Union, Dict, Callable, Any, Optional, Type, Tuple
|
||
|
|
||
|
import numba
|
||
|
|
||
|
import numpy as np
|
||
|
import cv2
|
||
|
from PIL import Image, ImageFont, ImageDraw, ImageFilter, ImageOps, ImageChops, ImageColor
|
||
|
|
||
|
import torch
|
||
|
|
||
|
from frontend.job_manager import JobInfo
|
||
|
|
||
|
scn2img_cache = {
|
||
|
"seed": None,
|
||
|
"cache": {}
|
||
|
}
|
||
|
|
||
|
monocular_depth_estimation = None
|
||
|
def try_loading_monocular_depth_estimation(monocular_depth_estimation_dir = "./src/monocular-depth-estimation/"):
|
||
|
global monocular_depth_estimation
|
||
|
if os.path.exists(monocular_depth_estimation_dir):
|
||
|
import tensorflow as tf
|
||
|
gpus = tf.config.experimental.list_physical_devices('GPU')
|
||
|
if gpus:
|
||
|
# Restrict TensorFlow to only allocate 1GB of memory on the first GPU
|
||
|
try:
|
||
|
tf.config.experimental.set_virtual_device_configuration(
|
||
|
gpus[0],
|
||
|
[tf.config.experimental.VirtualDeviceConfiguration(memory_limit=1024)]
|
||
|
)
|
||
|
except Exception:
|
||
|
import traceback
|
||
|
print("Exception during tf.config.experimental.set_virtual_device_configuration:", file=sys.stderr)
|
||
|
print(traceback.format_exc(), file=sys.stderr)
|
||
|
try:
|
||
|
from tensorflow.keras.layers import Layer, InputSpec
|
||
|
import tensorflow.keras
|
||
|
# from huggingface_hub import from_pretrained_keras
|
||
|
# https://stackoverflow.com/a/63631510/798588
|
||
|
|
||
|
from tensorflow.python.keras.utils import conv_utils
|
||
|
|
||
|
def normalize_data_format(value):
|
||
|
if value is None:
|
||
|
value = tensorflow.keras.backend.image_data_format()
|
||
|
data_format = value.lower()
|
||
|
if data_format not in {'channels_first', 'channels_last'}:
|
||
|
raise ValueError('The `data_format` argument must be one of '
|
||
|
'"channels_first", "channels_last". Received: ' +
|
||
|
str(value))
|
||
|
return data_format
|
||
|
|
||
|
|
||
|
class BilinearUpSampling2D(Layer):
|
||
|
def __init__(self, size=(2, 2), data_format=None, **kwargs):
|
||
|
super(BilinearUpSampling2D, self).__init__(**kwargs)
|
||
|
self.data_format = normalize_data_format(data_format)
|
||
|
self.size = conv_utils.normalize_tuple(size, 2, 'size')
|
||
|
self.input_spec = InputSpec(ndim=4)
|
||
|
|
||
|
def compute_output_shape(self, input_shape):
|
||
|
if self.data_format == 'channels_first':
|
||
|
height = self.size[0] * input_shape[2] if input_shape[2] is not None else None
|
||
|
width = self.size[1] * input_shape[3] if input_shape[3] is not None else None
|
||
|
return (input_shape[0],
|
||
|
input_shape[1],
|
||
|
height,
|
||
|
width)
|
||
|
elif self.data_format == 'channels_last':
|
||
|
height = self.size[0] * input_shape[1] if input_shape[1] is not None else None
|
||
|
width = self.size[1] * input_shape[2] if input_shape[2] is not None else None
|
||
|
return (input_shape[0],
|
||
|
height,
|
||
|
width,
|
||
|
input_shape[3])
|
||
|
|
||
|
def call(self, inputs):
|
||
|
input_shape = tensorflow.keras.backend.shape(inputs)
|
||
|
if self.data_format == 'channels_first':
|
||
|
height = self.size[0] * input_shape[2] if input_shape[2] is not None else None
|
||
|
width = self.size[1] * input_shape[3] if input_shape[3] is not None else None
|
||
|
elif self.data_format == 'channels_last':
|
||
|
height = self.size[0] * input_shape[1] if input_shape[1] is not None else None
|
||
|
width = self.size[1] * input_shape[2] if input_shape[2] is not None else None
|
||
|
|
||
|
return tf.image.resize(inputs, [height, width], method=tf.image.ResizeMethod.BILINEAR)
|
||
|
|
||
|
def get_config(self):
|
||
|
config = {'size': self.size, 'data_format': self.data_format}
|
||
|
base_config = super(BilinearUpSampling2D, self).get_config()
|
||
|
return dict(list(base_config.items()) + list(config.items()))
|
||
|
|
||
|
custom_objects = {'BilinearUpSampling2D': BilinearUpSampling2D, 'depth_loss_function': None}
|
||
|
monocular_depth_estimation = tf.keras.models.load_model(
|
||
|
monocular_depth_estimation_dir,
|
||
|
custom_objects=custom_objects,
|
||
|
compile=False
|
||
|
)
|
||
|
# todo: load model from pretrained keras into user .cache folder like transformers lib is doing it.
|
||
|
#
|
||
|
# custom_objects = {'BilinearUpSampling2D': BilinearUpSampling2D, 'depth_loss_function': None}
|
||
|
# custom_objects = {'depth_loss_function': None}
|
||
|
# monocular_depth_estimation = from_pretrained_keras(
|
||
|
# "keras-io/monocular-depth-estimation",
|
||
|
# custom_objects=custom_objects, compile=False
|
||
|
# )
|
||
|
# monocular_depth_estimation = from_pretrained_keras("keras-io/monocular-depth-estimation")
|
||
|
print('monocular_depth_estimation loaded')
|
||
|
except Exception:
|
||
|
import traceback
|
||
|
print("Error loading monocular_depth_estimation:", file=sys.stderr)
|
||
|
print(traceback.format_exc(), file=sys.stderr)
|
||
|
else:
|
||
|
print(f"monocular_depth_estimation not found at path, please make sure you have cloned \n the repository https://huggingface.co/keras-io/monocular-depth-estimation to {monocular_depth_estimation_dir}")
|
||
|
|
||
|
midas_depth_estimation = None
|
||
|
midas_transforms = None
|
||
|
midas_transform = None
|
||
|
def try_loading_midas_depth_estimation(use_large_model = True):
|
||
|
global midas_depth_estimation
|
||
|
global midas_transforms
|
||
|
global midas_transform
|
||
|
try:
|
||
|
if use_large_model:
|
||
|
midas_depth_estimation = torch.hub.load("intel-isl/MiDaS", "MiDaS")
|
||
|
else:
|
||
|
midas_depth_estimation = torch.hub.load("intel-isl/MiDaS", "MiDaS_small")
|
||
|
|
||
|
device = "cpu"
|
||
|
midas_depth_estimation.to(device)
|
||
|
|
||
|
midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms")
|
||
|
|
||
|
if use_large_model:
|
||
|
midas_transform = midas_transforms.default_transform
|
||
|
else:
|
||
|
midas_transform = midas_transforms.small_transform
|
||
|
except Exception:
|
||
|
import traceback
|
||
|
print("Error loading midas_depth_estimation:", file=sys.stderr)
|
||
|
print(traceback.format_exc(), file=sys.stderr)
|
||
|
|
||
|
def try_many(fs, *args, **kwargs):
|
||
|
for f in fs:
|
||
|
try:
|
||
|
return f(*args, **kwargs)
|
||
|
except:
|
||
|
pass
|
||
|
raise Exception("")
|
||
|
|
||
|
def scn2img_define_args():
|
||
|
parse_arg = {}
|
||
|
parse_arg["str"] = lambda x: str(x)
|
||
|
parse_arg["int"] = int
|
||
|
parse_arg["float"] = float
|
||
|
parse_arg["bool"] = lambda s: (s.strip()==str(bool(s)))
|
||
|
parse_arg["tuple"] = lambda s: tuple(s.split(",")),
|
||
|
parse_arg["int_tuple"] = lambda s: tuple(map(int,s.split(",")))
|
||
|
parse_arg["float_tuple"] = lambda s: tuple(map(float,s.split(",")))
|
||
|
parse_arg["degrees"] = lambda s: float(s) * math.pi / 180
|
||
|
parse_arg["color"] = lambda s: try_many([parse_arg["int_tuple"], parse_arg["str"]], s)
|
||
|
parse_arg["anything"] = lambda s:try_many([
|
||
|
parse_arg["int_tuple"],
|
||
|
parse_arg["float_tuple"],
|
||
|
parse_arg["int"],
|
||
|
parse_arg["float"],
|
||
|
parse_arg["tuple"],
|
||
|
parse_arg["color"],
|
||
|
parse_arg["str"],
|
||
|
],s)
|
||
|
function_args = {
|
||
|
"img2img": {
|
||
|
"prompt" : "str",
|
||
|
"image_editor_mode" : "str",
|
||
|
"mask_mode" : "int",
|
||
|
"mask_blur_strength" : "float",
|
||
|
"mask_restore" : "bool",
|
||
|
"ddim_steps" : "int",
|
||
|
"sampler_name" : "str",
|
||
|
"toggles" : "int_tuple",
|
||
|
"realesrgan_model_name": "str",
|
||
|
"n_iter" : "int",
|
||
|
"cfg_scale" : "float",
|
||
|
"denoising_strength" : "float",
|
||
|
"seed" : "int",
|
||
|
"height" : "int",
|
||
|
"width" : "int",
|
||
|
"resize_mode" : "int",
|
||
|
"denoising_strength" : "float",
|
||
|
},
|
||
|
"txt2img": {
|
||
|
"prompt" : "str",
|
||
|
"ddim_steps" : "int",
|
||
|
"sampler_name" : "str",
|
||
|
"toggles" : "int_tuple",
|
||
|
"realesrgan_model_name" : "str",
|
||
|
"ddim_eta" : "float",
|
||
|
"n_iter" : "int",
|
||
|
"batch_size" : "int",
|
||
|
"cfg_scale" : "float",
|
||
|
"seed" : "int",
|
||
|
"height" : "int",
|
||
|
"width" : "int",
|
||
|
"variant_amount" : "float",
|
||
|
"variant_seed" : "int",
|
||
|
},
|
||
|
"render_img2img": {
|
||
|
"select" : "int",
|
||
|
"variation": "int",
|
||
|
},
|
||
|
"render_txt2img": {
|
||
|
"select" : "int",
|
||
|
"variation": "int",
|
||
|
},
|
||
|
"image": {
|
||
|
"size" : "int_tuple",
|
||
|
"crop" : "int_tuple",
|
||
|
"position" : "float_tuple",
|
||
|
"resize" : "int_tuple",
|
||
|
"rotation" : "degrees",
|
||
|
"color" : "color",
|
||
|
"blend" : "str",
|
||
|
},
|
||
|
"render_mask": {
|
||
|
"mask_value" : "int",
|
||
|
"mask_by_color" : "color",
|
||
|
"mask_by_color_space" : "str",
|
||
|
"mask_by_color_threshold" : "int",
|
||
|
"mask_by_color_at" : "int_tuple",
|
||
|
"mask_is_depth" : "bool",
|
||
|
"mask_depth" : "bool",
|
||
|
"mask_depth_normalize" : "bool",
|
||
|
"mask_depth_model" : "int",
|
||
|
"mask_depth_min" : "float",
|
||
|
"mask_depth_max" : "float",
|
||
|
"mask_depth_invert" : "bool",
|
||
|
"mask_open" : "int",
|
||
|
"mask_close" : "int",
|
||
|
"mask_blur" : "float",
|
||
|
"mask_grow" : "int",
|
||
|
"mask_shrink" : "int",
|
||
|
"mask_invert" : "bool",
|
||
|
},
|
||
|
"render_3d": {
|
||
|
"transform3d" : "bool",
|
||
|
"transform3d_depth_model" : "int",
|
||
|
"transform3d_depth_near" : "float",
|
||
|
"transform3d_depth_scale" : "float",
|
||
|
"transform3d_from_hfov" : "degrees",
|
||
|
"transform3d_from_pose" : "float_tuple",
|
||
|
"transform3d_to_hfov" : "degrees",
|
||
|
"transform3d_to_pose" : "float_tuple",
|
||
|
"transform3d_min_mask" : "int",
|
||
|
"transform3d_max_mask" : "int",
|
||
|
"transform3d_mask_invert" : "bool",
|
||
|
"transform3d_inpaint" : "bool",
|
||
|
"transform3d_inpaint_radius" : "int",
|
||
|
"transform3d_inpaint_method" : "int",
|
||
|
"transform3d_inpaint_restore_mask" : "bool",
|
||
|
},
|
||
|
"object": {
|
||
|
"initial_seed": "int",
|
||
|
}
|
||
|
}
|
||
|
function_args_ext = {
|
||
|
"image": ["object", "image", "render_mask", "render_3d"],
|
||
|
"img2img": ["object", "render_img2img", "img2img", "image", "render_mask", "render_3d"],
|
||
|
"txt2img": ["object", "render_txt2img", "txt2img", "image", "render_mask", "render_3d"],
|
||
|
}
|
||
|
return parse_arg, function_args, function_args_ext
|
||
|
|
||
|
def get_scn2img(MemUsageMonitor:Type, save_sample:Callable, get_next_sequence_number:Callable, seed_to_int:Callable, txt2img: Callable, txt2img_defaults: Dict, img2img: Callable, img2img_defaults: Dict, opt: argparse.Namespace = None):
|
||
|
opt = opt or argparse.Namespace()
|
||
|
|
||
|
def next_seed(s):
|
||
|
s = seed_to_int(s)
|
||
|
return random.Random(s).randint(0, 2**32 - 1)
|
||
|
|
||
|
def scn2img(prompt: str, toggles: List[int], seed: Union[int, str, None], fp = None, job_info: JobInfo = None):
|
||
|
global scn2img_cache
|
||
|
outpath = opt.outdir_scn2img or opt.outdir or "outputs/scn2img-samples"
|
||
|
err = False
|
||
|
seed = seed_to_int(seed)
|
||
|
|
||
|
prompt = prompt or ''
|
||
|
clear_cache = 0 in toggles
|
||
|
output_intermediates = 1 in toggles
|
||
|
skip_save = 2 not in toggles
|
||
|
write_info_files = 3 in toggles
|
||
|
write_sample_info_to_log_file = 4 in toggles
|
||
|
jpg_sample = 5 in toggles
|
||
|
|
||
|
os.makedirs(outpath, exist_ok=True)
|
||
|
|
||
|
if clear_cache or scn2img_cache["seed"] != seed:
|
||
|
scn2img_cache["seed"] = seed
|
||
|
scn2img_cache["cache"] = {}
|
||
|
|
||
|
comments = []
|
||
|
print_log_lvl = 2
|
||
|
def gen_log_lines(*args, **kwargs):
|
||
|
yield (" ".join(map(str, args)))
|
||
|
for k,v in kwargs.items():
|
||
|
yield (f"{k} = {v}")
|
||
|
def log(*args, **kwargs):
|
||
|
lines = gen_log_lines(*args, **kwargs)
|
||
|
for line in lines:
|
||
|
comments.append(line)
|
||
|
def log_lvl(lvl, *args, **kwargs):
|
||
|
if (lvl <= print_log_lvl):
|
||
|
lines = gen_log_lines(*args, **kwargs)
|
||
|
print("\n".join(lines))
|
||
|
log(*args, **kwargs)
|
||
|
def log_trace(*args, **kwargs):
|
||
|
log_lvl(5,"[TRACE]", *args, **kwargs)
|
||
|
def log_debug(*args, **kwargs):
|
||
|
log_lvl(4,"[DEBUG]", *args, **kwargs)
|
||
|
def log_info(*args, **kwargs):
|
||
|
log_lvl(3,"[INFO]", *args, **kwargs)
|
||
|
def log_warn(*args, **kwargs):
|
||
|
log_lvl(2,"[WARN]", *args, **kwargs)
|
||
|
def log_err(*args, **kwargs):
|
||
|
log_lvl(1,"[ERROR]", *args, **kwargs)
|
||
|
def log_exception(*args, **kwargs):
|
||
|
log_lvl(0,"[EXCEPTION]", *args, **kwargs)
|
||
|
import traceback
|
||
|
log_lvl(0,traceback.format_exc())
|
||
|
|
||
|
# cache = scn2img_cache["cache"]
|
||
|
log_info("scn2img_cache")
|
||
|
log_info(list(scn2img_cache["cache"].keys()))
|
||
|
|
||
|
def gen_seeds(seed):
|
||
|
while True:
|
||
|
yield seed
|
||
|
seed = next_seed(seed)
|
||
|
|
||
|
def is_seed_invalid(s):
|
||
|
result = (
|
||
|
(type(s) != int)
|
||
|
or (s == "")
|
||
|
or (s is None)
|
||
|
)
|
||
|
return result
|
||
|
|
||
|
def is_seed_valid(s):
|
||
|
result = not is_seed_invalid(s)
|
||
|
return result
|
||
|
|
||
|
def vary_seed(s, v):
|
||
|
s = int(s)
|
||
|
v = int(v)
|
||
|
if v == 0:
|
||
|
return s
|
||
|
else:
|
||
|
return next_seed(s+v)
|
||
|
|
||
|
if job_info:
|
||
|
output_images = job_info.images
|
||
|
else:
|
||
|
output_images = []
|
||
|
|
||
|
class SceneObject:
|
||
|
def __init__(self, func, title, args, depth, children):
|
||
|
self.func = func
|
||
|
self.title = title
|
||
|
self.args = args or collections.OrderedDict()
|
||
|
self.depth = depth
|
||
|
self.children = children or []
|
||
|
def __len__(self):
|
||
|
return len(self.children)
|
||
|
def __iter__(self):
|
||
|
return iter(self.children)
|
||
|
def __getitem__(self, key):
|
||
|
if type(key) == int:
|
||
|
return self.children[key]
|
||
|
elif str(key) in self.args:
|
||
|
return self.args[str(key)]
|
||
|
else:
|
||
|
return None
|
||
|
def __setitem__(self, key, value):
|
||
|
if type(key) == int:
|
||
|
self.children[key] = value
|
||
|
else:
|
||
|
self.args[str(key)] = value
|
||
|
def __contains__(self, key):
|
||
|
if type(key) == int:
|
||
|
return key < len(self.children)
|
||
|
else:
|
||
|
return str(key) in self.args
|
||
|
def __str__(self):
|
||
|
return repr(self)
|
||
|
def __repr__(self):
|
||
|
args = collections.OrderedDict()
|
||
|
if len(self.title) > 0:
|
||
|
args["title"] = self.title
|
||
|
args.update(self.args)
|
||
|
if len(self.children) > 0:
|
||
|
args["children"] = self.children
|
||
|
args = ", ".join(map(lambda kv: f"{str(kv[0])} = {repr(kv[1])}", args.items()))
|
||
|
return f"{self.func}({args})"
|
||
|
def cache_hash(self, seed=None, exclude_args=None, exclude_child_args=None, extra=None, child_extra=None):
|
||
|
exclude_args = exclude_args or set()
|
||
|
exclude_args = set(exclude_args)
|
||
|
exclude_child_args = exclude_child_args or set()
|
||
|
exclude_child_args = set(exclude_child_args)
|
||
|
if None not in exclude_args:
|
||
|
exclude_args.add(None)
|
||
|
return hash((
|
||
|
hash(seed),
|
||
|
hash(extra),
|
||
|
hash(self.func),
|
||
|
hash(tuple([
|
||
|
(k,v) for k,v in self.args.items()
|
||
|
if k not in exclude_args
|
||
|
])),
|
||
|
hash(tuple([
|
||
|
c.cache_hash(
|
||
|
seed = seed,
|
||
|
exclude_args = exclude_child_args,
|
||
|
exclude_child_args = exclude_child_args,
|
||
|
extra = child_extra,
|
||
|
child_extra = child_extra
|
||
|
)
|
||
|
for c in self.children
|
||
|
]))
|
||
|
))
|
||
|
|
||
|
|
||
|
|
||
|
parse_arg, function_args, function_args_ext = scn2img_define_args()
|
||
|
# log_debug("function_args", function_args)
|
||
|
|
||
|
def parse_scene(prompt, log):
|
||
|
|
||
|
parse_inline_comment = re.compile(r'(?m)//.+?$') #(?m): $ also matches at before \n
|
||
|
parse_multiline_comment = re.compile(r'(?s)(^|[^/])/\*.+?\*/') #(?s): . matches \n
|
||
|
parse_attr = re.compile(r'^\s*([\w_][\d\w_]*)\s*[:=\s]\s*(.+)\s*$')
|
||
|
parse_heading = re.compile(r'^\s*(#+)([<]?)([>]?)\s*(.*)$') #
|
||
|
|
||
|
class Section:
|
||
|
def __init__(self, depth=0, title="", content=None, children=None):
|
||
|
self.depth = depth
|
||
|
self.title = title
|
||
|
self.lines = []
|
||
|
self.content = content or collections.OrderedDict()
|
||
|
self.children = children or []
|
||
|
self.func = None
|
||
|
def __repr__(self):
|
||
|
return str(self)
|
||
|
def __str__(self):
|
||
|
return "\n".join(
|
||
|
[("#"*self.depth) + " " + self.title]
|
||
|
+ [f"func={self.func}"]
|
||
|
+ [f"{k}={v}" for k,v in self.content.items()]
|
||
|
+ list(map(str, self.children))
|
||
|
)
|
||
|
|
||
|
def strip_inline_comments(txt):
|
||
|
while True:
|
||
|
txt,replaced = parse_inline_comment.subn("", txt)
|
||
|
if replaced == 0:
|
||
|
break
|
||
|
return txt
|
||
|
|
||
|
def strip_multiline_comments(txt):
|
||
|
while True:
|
||
|
txt,replaced = parse_multiline_comment.subn("\1", txt)
|
||
|
if replaced == 0:
|
||
|
break
|
||
|
return txt
|
||
|
|
||
|
def strip_comments(txt):
|
||
|
txt = strip_multiline_comments(txt)
|
||
|
txt = strip_inline_comments(txt)
|
||
|
return txt
|
||
|
|
||
|
def parse_content(lines):
|
||
|
|
||
|
content = collections.OrderedDict()
|
||
|
for line in lines:
|
||
|
# line = strip_inline_comments(line)
|
||
|
m = parse_attr.match(line)
|
||
|
if m is None:
|
||
|
attr = None
|
||
|
value = line
|
||
|
else:
|
||
|
attr = m.group(1)
|
||
|
value = m.group(2)
|
||
|
|
||
|
is_multi_value = (attr is None)
|
||
|
if is_multi_value and attr in content:
|
||
|
content[attr].append(value)
|
||
|
elif is_multi_value and attr not in content:
|
||
|
content[attr] = [value]
|
||
|
elif attr not in content:
|
||
|
content[attr] = value
|
||
|
else:
|
||
|
log.append(f"Warn: value for attr {attr} already exists. ignoring {line}.")
|
||
|
|
||
|
return content
|
||
|
|
||
|
def parse_sections(lines):
|
||
|
sections = []
|
||
|
current_section = Section()
|
||
|
stack = []
|
||
|
bump_depth = 0
|
||
|
for line in lines:
|
||
|
m = parse_heading.match(line)
|
||
|
if m is None:
|
||
|
current_section.lines.append(line)
|
||
|
else:
|
||
|
current_section.content = parse_content(current_section.lines)
|
||
|
yield current_section
|
||
|
current_section = Section(
|
||
|
depth = len(m.group(1)) + bump_depth,
|
||
|
title = m.group(3)
|
||
|
)
|
||
|
# sections after this will have their depth bumped by number matched '>'.
|
||
|
# this allows deep trees while avoiding growing number of '#' by
|
||
|
# just using '#> example title' headings
|
||
|
bump_depth -= len(m.group(2))
|
||
|
bump_depth += len(m.group(3))
|
||
|
|
||
|
current_section.content = parse_content(current_section.lines)
|
||
|
yield current_section
|
||
|
|
||
|
def to_trees(sections):
|
||
|
stack = []
|
||
|
roots = []
|
||
|
def insert_section(section):
|
||
|
assert(len(stack) == section.depth)
|
||
|
if section.depth == 0:
|
||
|
roots.append(section)
|
||
|
if len(stack) > 0:
|
||
|
parent = stack[len(stack)-1]
|
||
|
parent.children.append(section)
|
||
|
stack.append(section)
|
||
|
|
||
|
for section in sections:
|
||
|
last_depth = len(stack)-1
|
||
|
|
||
|
is_child = section.depth > last_depth
|
||
|
is_sibling = section.depth == last_depth
|
||
|
is_parental_sibling = section.depth < last_depth
|
||
|
if is_child:
|
||
|
for d in range(last_depth+1, section.depth, 1):
|
||
|
intermediate = Section(depth = d)
|
||
|
insert_section(intermediate)
|
||
|
|
||
|
elif is_sibling or is_parental_sibling:
|
||
|
stack = stack[:section.depth]
|
||
|
|
||
|
insert_section(section)
|
||
|
return roots
|
||
|
|
||
|
def to_scene(trees, depth=0):
|
||
|
if depth == 0:
|
||
|
return SceneObject(
|
||
|
func="scn2img",
|
||
|
title="",
|
||
|
args=None,
|
||
|
depth=depth,
|
||
|
children=[
|
||
|
SceneObject(
|
||
|
func="scene",
|
||
|
title="",
|
||
|
args=None,
|
||
|
depth=depth+1,
|
||
|
children=[to_scene(tree, depth+2)]
|
||
|
)
|
||
|
for tree in trees
|
||
|
]
|
||
|
)
|
||
|
else:
|
||
|
assert(type(trees) == Section)
|
||
|
section = trees
|
||
|
has_prompt = "prompt" in section.content
|
||
|
has_color = "color" in section.content
|
||
|
has_childs = len(section.children) > 0
|
||
|
has_input_img = has_childs or has_color
|
||
|
func = (
|
||
|
"img2img" if (has_input_img and has_prompt) else
|
||
|
"txt2img" if (has_prompt) else
|
||
|
"image"
|
||
|
)
|
||
|
return SceneObject(
|
||
|
func=func,
|
||
|
title=section.title,
|
||
|
args=section.content,
|
||
|
depth=depth,
|
||
|
children=[
|
||
|
to_scene(child, depth+1)
|
||
|
for child in section.children
|
||
|
]
|
||
|
)
|
||
|
|
||
|
def parse_scene_args(scene):
|
||
|
image_func_args = function_args["image"]
|
||
|
scene_func_args = function_args[scene.func] if scene.func in function_args else {}
|
||
|
extends = function_args_ext[scene.func] if scene.func in function_args_ext else []
|
||
|
for arg in scene.args.keys():
|
||
|
arg_type = "anything"
|
||
|
for ext in extends:
|
||
|
if arg in function_args[ext]:
|
||
|
arg_type = function_args[ext][arg]
|
||
|
break
|
||
|
try:
|
||
|
scene.args[arg] = parse_arg[arg_type](scene.args[arg])
|
||
|
except Exception as e:
|
||
|
value = scene.args[arg]
|
||
|
msg = f"Attribute parsing failed. Expected {arg_type}, got '{value}'."
|
||
|
log.append(f"{msg}. Exception: '{str(e)}'")
|
||
|
for child in scene.children:
|
||
|
parse_scene_args(child)
|
||
|
return scene
|
||
|
|
||
|
prompt = strip_comments(prompt)
|
||
|
lines = prompt.split("\n")
|
||
|
sections = parse_sections(lines)
|
||
|
sections = list(sections)
|
||
|
trees = to_trees(sections)
|
||
|
scene = to_scene(trees)
|
||
|
parse_scene_args(scene)
|
||
|
|
||
|
return scene
|
||
|
|
||
|
def save_sample_scn2img(img, obj):
|
||
|
if img is None:
|
||
|
return
|
||
|
base_count = get_next_sequence_number(outpath)
|
||
|
filename = "[SEED]_result"
|
||
|
filename = f"{base_count:05}-" + filename
|
||
|
filename = filename.replace("[SEED]", str(seed))
|
||
|
save_sample(img, outpath, filename, jpg_sample, None, None, None, None, None, False, None, None, None, None, None, None, None, None, None, False)
|
||
|
if write_info_files or write_sample_info_to_log_file:
|
||
|
info_dict = {
|
||
|
"prompt": prompt,
|
||
|
"scene_object": str(obj),
|
||
|
"seed": seed
|
||
|
}
|
||
|
if write_info_files:
|
||
|
filename_i = os.path.join(outpath, filename)
|
||
|
with open(f"{filename_i}.yaml", "w", encoding="utf8") as f:
|
||
|
yaml.dump(info_dict, f, allow_unicode=True, width=10000)
|
||
|
if write_sample_info_to_log_file:
|
||
|
sample_log_path = os.path.join(outpath, "log.yaml")
|
||
|
with open(sample_log_path, "a", encoding="utf8") as log_file:
|
||
|
yaml.dump(info_dict, log_file, allow_unicode=True, width=10000)
|
||
|
log_file.write(" \n")
|
||
|
|
||
|
|
||
|
def render_scene(output_images, scene, seeds):
|
||
|
def pose(pos, rotation, center):
|
||
|
cs, sn = math.cos(rotation), math.sin(rotation)
|
||
|
return x, y, cs, sn, cy, c
|
||
|
|
||
|
def pose_mat3(pos=(0,0), rotation=0, center=(0,0)):
|
||
|
x, y = pos or (0,0)
|
||
|
cs, sn = math.cos(rotation), math.sin(rotation)
|
||
|
cx, cy = center or (0,0)
|
||
|
return (
|
||
|
np.array([ # coordinates in parent coordinates
|
||
|
[1,0,x],
|
||
|
[0,1,y],
|
||
|
[0,0,1],
|
||
|
]) @ np.array([ # rotated coordinates with center in origin
|
||
|
[cs,-sn,-cx],
|
||
|
[+sn,cs,-cy],
|
||
|
[0,0,1],
|
||
|
]) # coordinates in pose
|
||
|
)
|
||
|
|
||
|
def get_rect(img):
|
||
|
w, h = img.size
|
||
|
return np.array([
|
||
|
[0, 0], # TL
|
||
|
[0, h], # BL
|
||
|
[w, h], # BR
|
||
|
[w, 0], # TR
|
||
|
])
|
||
|
|
||
|
def transform_points(mat3, pts):
|
||
|
rot = mat3[:2,:2]
|
||
|
pos = mat3[:2,2]
|
||
|
# return rot @ pts.T + pos
|
||
|
return pts @ rot.T + pos
|
||
|
|
||
|
def create_image(size, color=None):
|
||
|
# log_debug("")
|
||
|
# log_debug("Creating image...", size = type(size), color = color)
|
||
|
# log_debug("")
|
||
|
if size is None: return None
|
||
|
if color is None: color = (0,0,0,0)
|
||
|
return Image.new("RGBA", size, color)
|
||
|
|
||
|
def resize_image(img, size, crop=None):
|
||
|
if img is None: return None
|
||
|
if size is None:
|
||
|
return img if (crop is None) else img.crop(box=crop)
|
||
|
# resize_is_upscaling = (size[0] > img.size[0]) or (size[1] > img.size[1])
|
||
|
# todo: upscale with realesrgan
|
||
|
return img.resize(size, box=crop)
|
||
|
|
||
|
def blend_image_at(dst, img, pos, rotation, center, blend_mode):
|
||
|
if img is None:
|
||
|
return dst
|
||
|
assert(blend_mode.lower() in ["alpha","mask","add","add_modulo","darker","difference","lighter","logical_and","logical_or","logical_xor","multiply","soft_light","hard_light","overlay","screen","subtract","subtract_modulo"])
|
||
|
blend_mode = blend_mode.lower()
|
||
|
# log_debug(f"blend_image_at({dst}, {img}, {pos}, {rotation}, {center})")
|
||
|
center = center or (img.size[0]*0.5, img.size[1]*0.5)
|
||
|
pos = pos or ((dst.size[0]*0.5, dst.size[1]*0.5) if dst is not None else None)
|
||
|
|
||
|
tf = pose_mat3((0,0), rotation)
|
||
|
rect_points = get_rect(img) - center
|
||
|
rect_points = transform_points(tf, rect_points)
|
||
|
min_x = min([p[0] for p in rect_points])
|
||
|
min_y = min([p[1] for p in rect_points])
|
||
|
max_x = max([p[0] for p in rect_points])
|
||
|
max_y = max([p[1] for p in rect_points])
|
||
|
new_w = max_x - min_x
|
||
|
new_h = max_y - min_y
|
||
|
new_size = (int(new_w), int(new_h))
|
||
|
|
||
|
# default values for pos
|
||
|
if pos is None and dst is not None:
|
||
|
# center img in dst
|
||
|
pos = (
|
||
|
dst.size[0]*0.5,
|
||
|
dst.size[0]*0.5
|
||
|
)
|
||
|
elif pos is None and dst is None:
|
||
|
# dst is None, choose pos so that it shows whole img
|
||
|
pos = (-min_x, -min_y)
|
||
|
|
||
|
min_x += pos[0]
|
||
|
min_y += pos[1]
|
||
|
max_x += pos[0]
|
||
|
max_y += pos[1]
|
||
|
|
||
|
if rotation != 0:
|
||
|
img = img.rotate(
|
||
|
angle = -rotation * (180 / math.pi),
|
||
|
expand = True,
|
||
|
fillcolor = (0,0,0,0)
|
||
|
)
|
||
|
|
||
|
if (dst is None) and (img.size == new_size):
|
||
|
dst = img.copy()
|
||
|
# dst = img
|
||
|
return dst
|
||
|
|
||
|
else:
|
||
|
if (dst is None):
|
||
|
dst = create_image(new_size)
|
||
|
dx = int(min_x)
|
||
|
dy = int(min_y)
|
||
|
sx = -dx if (dx < 0) else 0
|
||
|
sy = -dy if (dy < 0) else 0
|
||
|
dx = max(0, dx)
|
||
|
dy = max(0, dy)
|
||
|
# log_debug(f"dest=({dx},{dy}), source=({sx},{sy})")
|
||
|
if blend_mode in ["alpha","mask"]:
|
||
|
dst.alpha_composite(img, dest=(dx,dy), source=(sx,sy))
|
||
|
else:
|
||
|
w,h = img.size
|
||
|
img_crop = img.crop(box=(sx,sy,w-1,h-1))
|
||
|
w,h = img_crop.size
|
||
|
dst_crop = dst.crop(box=(dx,dy,dx+w,dy+h))
|
||
|
blend_func = getattr(ImageChops, blend_mode)
|
||
|
blended = blend_func(dst_crop, img_crop)
|
||
|
dst.paste(blended,box=(dx,dy))
|
||
|
return dst
|
||
|
|
||
|
def blend_objects(seeds, dst, objects):
|
||
|
# log_debug("")
|
||
|
# log_debug(f"blend_objects({dst}, {objects})")
|
||
|
# log_debug("")
|
||
|
for obj in reversed(objects):
|
||
|
img = render_object(seeds, obj)
|
||
|
# if img is None:
|
||
|
# log_debug("")
|
||
|
# log_debug(f"img is None after render_object in blend_objects({dst}, {objects})")
|
||
|
# log_debug("")
|
||
|
try:
|
||
|
dst = blend_image_at(
|
||
|
dst = dst,
|
||
|
img = img,
|
||
|
pos = obj["pos"] or obj["position"] or None,
|
||
|
rotation = obj["rotation"] or obj["rotate"] or obj["angle"] or 0,
|
||
|
center = obj["center"] or None,
|
||
|
blend_mode = obj["blend"] if "blend" in obj else "alpha",
|
||
|
)
|
||
|
except Exception as e:
|
||
|
# log_debug("")
|
||
|
log_exception(f"Exception! blend_objects({dst}, {objects})")
|
||
|
log_err("obj", obj)
|
||
|
log_err("img", img)
|
||
|
log_err("")
|
||
|
raise e
|
||
|
|
||
|
if dst is not None:
|
||
|
dst = dst.copy()
|
||
|
return dst
|
||
|
|
||
|
def render_mask(seeds, obj, img, input_mask = None):
|
||
|
if img is None and input_mask is None: return img
|
||
|
|
||
|
mask = (
|
||
|
img.getchannel("A")
|
||
|
if img is not None
|
||
|
and input_mask is None
|
||
|
else None
|
||
|
)
|
||
|
changed_mask = False
|
||
|
|
||
|
def combine_masks(old_mask, new_mask, mode):
|
||
|
return new_mask
|
||
|
|
||
|
combine_mode = 1
|
||
|
|
||
|
if input_mask is not None:
|
||
|
mask = input_mask
|
||
|
changed_mask = True
|
||
|
|
||
|
if "mask_value" in obj:
|
||
|
new_value = obj["mask_value"]
|
||
|
mask.paste( new_value, mask.getbbox() )
|
||
|
changed_mask = True
|
||
|
|
||
|
if ("mask_by_color" in obj or "mask_by_color_at" in obj) and img is not None:
|
||
|
img_arr = np.asarray(img.convert("RGB"))
|
||
|
color = obj["mask_by_color"]
|
||
|
color_at = obj["mask_by_color_at"] or None
|
||
|
if color_at is not None:
|
||
|
num_points = int(math.floor(len(color_at)/2))
|
||
|
points = [
|
||
|
(color_at[k*2],color_at[k*2+1])
|
||
|
for k in range(num_points)
|
||
|
]
|
||
|
if len(points) > 0:
|
||
|
colors = np.array([img_arr[y,x] for x,y in points])
|
||
|
color = tuple(np.round(colors.mean(axis=0)).astype(np.uint8).flatten())
|
||
|
colorspace = obj["mask_by_color_space"] or "LAB"
|
||
|
threshold = obj["mask_by_color_threshold"] or 15
|
||
|
colorspace = colorspace.upper()
|
||
|
reference_color = "RGB"
|
||
|
if colorspace != "RGB":
|
||
|
cvts = {
|
||
|
"LAB": cv2.COLOR_RGB2Lab,
|
||
|
"LUV": cv2.COLOR_RGB2Luv,
|
||
|
"HSV": cv2.COLOR_RGB2HSV,
|
||
|
"HLS": cv2.COLOR_RGB2HLS,
|
||
|
"YUV": cv2.COLOR_RGB2YUV,
|
||
|
"GRAY": cv2.COLOR_RGB2GRAY,
|
||
|
"XYZ": cv2.COLOR_RGB2XYZ,
|
||
|
"YCrCb": cv2.COLOR_RGB2YCrCb,
|
||
|
}
|
||
|
rgb = Image.new("RGB", size=(1,1), color=color)
|
||
|
rgb_arr = np.asarray(rgb)
|
||
|
cvt_arr = cv2.cvtColor(rgb_arr, cvts[colorspace])
|
||
|
img_arr = cv2.cvtColor(img_arr, cvts[colorspace])
|
||
|
reference_color = cvt_arr[0,0]
|
||
|
img_arr = img_arr.astype(np.float32)
|
||
|
dist = np.max(np.abs(img_arr - reference_color),axis=2)
|
||
|
mask_arr = (dist < threshold).astype(np.uint8) * 255
|
||
|
mask = Image.fromarray(mask_arr)
|
||
|
changed_mask = True
|
||
|
|
||
|
if obj["mask_depth"]:
|
||
|
mask_depth_min = obj["mask_depth_min"] or 0.2
|
||
|
mask_depth_max = obj["mask_depth_max"] or 0.8
|
||
|
mask_depth_invert = bool(obj["mask_depth_invert"]) or False
|
||
|
mask_is_depth = obj["mask_is_depth"] if "mask_is_depth" in obj else False
|
||
|
mask_depth_normalize = obj["mask_depth_normalize"] if "mask_depth_normalize" in obj else True
|
||
|
mask_depth_model = int(obj["mask_depth_model"]) if "mask_depth_model" in obj else 1
|
||
|
depth = run_depth_estimation(img, mask_depth_model)
|
||
|
res = run_depth_filter(depth, mask_depth_min, mask_depth_max, mask_depth_invert, mask_depth_normalize, mask_is_depth)
|
||
|
if res is not None:
|
||
|
mask = res.resize(img.size)
|
||
|
changed_mask = True
|
||
|
|
||
|
if "mask_open" in obj:
|
||
|
mask = mask.filter(ImageFilter.MinFilter(obj["mask_open"]))
|
||
|
mask = mask.filter(ImageFilter.MaxFilter(obj["mask_open"]))
|
||
|
changed_mask = True
|
||
|
|
||
|
if "mask_close" in obj:
|
||
|
mask = mask.filter(ImageFilter.MaxFilter(obj["mask_close"]))
|
||
|
mask = mask.filter(ImageFilter.MinFilter(obj["mask_close"]))
|
||
|
changed_mask = True
|
||
|
|
||
|
if "mask_grow" in obj:
|
||
|
mask = mask.filter(ImageFilter.MaxFilter(obj["mask_grow"]))
|
||
|
changed_mask = True
|
||
|
|
||
|
if "mask_shrink" in obj:
|
||
|
mask = mask.filter(ImageFilter.MinFilter(obj["mask_shrink"]))
|
||
|
changed_mask = True
|
||
|
|
||
|
if "mask_blur" in obj:
|
||
|
mask = mask.filter(ImageFilter.GaussianBlur(obj["mask_blur"]))
|
||
|
changed_mask = True
|
||
|
|
||
|
if obj["mask_invert"]:
|
||
|
mask = ImageChops.invert(mask)
|
||
|
changed_mask = True
|
||
|
|
||
|
if changed_mask and img is not None and mask is not None:
|
||
|
img.putalpha(mask)
|
||
|
|
||
|
if img is not None:
|
||
|
return img
|
||
|
else:
|
||
|
return mask
|
||
|
|
||
|
# remember output images, to avoid duplicates
|
||
|
output_image_set = set()
|
||
|
|
||
|
def output_img(img):
|
||
|
if img is None: return
|
||
|
img_id = id(img)
|
||
|
if img_id in output_image_set:
|
||
|
return img
|
||
|
output_image_set.add(img_id)
|
||
|
output_images.append(img)
|
||
|
|
||
|
def render_intermediate(img, obj):
|
||
|
if output_intermediates:
|
||
|
output_img(img)
|
||
|
if not skip_save:
|
||
|
save_sample_scn2img(img, obj)
|
||
|
return img
|
||
|
|
||
|
def render_3d(img, obj):
|
||
|
if img is None:
|
||
|
return img
|
||
|
if obj["transform3d"] == True:
|
||
|
d2r = math.pi / 180.0
|
||
|
depth_model = obj["transform3d_depth_model"] if "transform3d_depth_model" in obj else 1
|
||
|
depth_near = obj["transform3d_depth_near"] if "transform3d_depth_near" in obj else 0.1
|
||
|
depth_scale = obj["transform3d_depth_scale"] if "transform3d_depth_scale" in obj else 1.0
|
||
|
from_hfov = obj["transform3d_from_hfov"] if "transform3d_from_hfov" in obj else (45*d2r)
|
||
|
from_pose = obj["transform3d_from_pose"] if "transform3d_from_pose" in obj else (0,0,0, 0,0,0)
|
||
|
to_hfov = obj["transform3d_to_hfov"] if "transform3d_to_hfov" in obj else (45*d2r)
|
||
|
to_pose = obj["transform3d_to_pose"] if "transform3d_to_pose" in obj else (0,0,0, 0,0,0)
|
||
|
min_mask = obj["transform3d_min_mask"] if "transform3d_min_mask" in obj else 128
|
||
|
max_mask = obj["transform3d_max_mask"] if "transform3d_max_mask" in obj else 255
|
||
|
mask_invert = obj["transform3d_mask_invert"] if "transform3d_mask_invert" in obj else False
|
||
|
inpaint = obj["transform3d_inpaint"] if "transform3d_inpaint" in obj else True
|
||
|
inpaint_radius = obj["transform3d_inpaint_radius"] if "transform3d_inpaint_radius" in obj else 5
|
||
|
inpaint_method = obj["transform3d_inpaint_method"] if "transform3d_inpaint_method" in obj else 0
|
||
|
inpaint_rmask = obj["transform3d_inpaint_restore_mask"] if "transform3d_inpaint_restore_mask" in obj else False
|
||
|
from_pose = list(from_pose)
|
||
|
to_pose = list(to_pose)
|
||
|
while len(from_pose) < 6: from_pose.append(0)
|
||
|
while len(to_pose) < 6: to_pose.append(0)
|
||
|
from_pos, from_rpy = from_pose[:3], from_pose[3:6]
|
||
|
to_pos, to_rpy = to_pose[:3], to_pose[3:6]
|
||
|
hfov0_rad, hfov1_rad = from_hfov, to_hfov
|
||
|
tf_world_cam0 = pose3d_rpy(*from_pos, *(deg*d2r for deg in from_rpy))
|
||
|
tf_world_cam1 = pose3d_rpy(*to_pos, *(deg*d2r for deg in to_rpy))
|
||
|
|
||
|
depth = run_depth_estimation(img, depth_model)
|
||
|
img = run_transform_image_3d_simple(img, depth, depth_near, depth_scale, hfov0_rad, tf_world_cam0, hfov1_rad, tf_world_cam1, min_mask, max_mask, mask_invert)
|
||
|
if inpaint:
|
||
|
mask = img.getchannel("A")
|
||
|
img_inpainted = cv2.inpaint(
|
||
|
np.asarray(img.convert("RGB")),
|
||
|
255-np.asarray(mask),
|
||
|
inpaint_radius,
|
||
|
[cv2.INPAINT_TELEA, cv2.INPAINT_NS][inpaint_method]
|
||
|
)
|
||
|
img = Image.fromarray(img_inpainted).convert("RGBA")
|
||
|
if inpaint_rmask:
|
||
|
img.putalpha(mask)
|
||
|
return img
|
||
|
|
||
|
def render_image(seeds, obj):
|
||
|
img = create_image(obj["size"], obj["color"])
|
||
|
img = blend_objects(
|
||
|
seeds,
|
||
|
img,
|
||
|
obj.children
|
||
|
)
|
||
|
img = render_mask(seeds, obj, img)
|
||
|
img = resize_image(img, obj["resize"], obj["crop"])
|
||
|
# if img is None: log_warn(f"result of render_image({obj}) is None")
|
||
|
img = render_3d(img, obj)
|
||
|
img = render_intermediate(img, obj)
|
||
|
return img
|
||
|
|
||
|
def prepare_img2img_kwargs(seeds, obj, img):
|
||
|
# log_trace(f"prepare_img2img_kwargs({obj}, {img})")
|
||
|
img2img_kwargs = {}
|
||
|
# img2img_kwargs.update(img2img_defaults)
|
||
|
func_args = function_args["img2img"]
|
||
|
for k,v in img2img_defaults.items():
|
||
|
if k in func_args:
|
||
|
img2img_kwargs[k] = v
|
||
|
|
||
|
if "mask_mode" in img2img_kwargs:
|
||
|
img2img_kwargs["mask_mode"] = 1 - img2img_kwargs["mask_mode"]
|
||
|
|
||
|
if "size" in obj:
|
||
|
img2img_kwargs["width"] = obj["size"][0]
|
||
|
img2img_kwargs["height"] = obj["size"][1]
|
||
|
|
||
|
for k,v in func_args.items():
|
||
|
if k in obj:
|
||
|
img2img_kwargs[k] = obj[k]
|
||
|
|
||
|
if "toggles" in img2img_kwargs:
|
||
|
img2img_kwargs["toggles"] = list(img2img_kwargs["toggles"])
|
||
|
|
||
|
assert("seed" in img2img_kwargs)
|
||
|
if "seed" in img2img_kwargs:
|
||
|
s = img2img_kwargs["seed"]
|
||
|
if is_seed_valid(s):
|
||
|
img2img_kwargs["seed"] = int(s)
|
||
|
else:
|
||
|
img2img_kwargs["seed"] = next(seeds)
|
||
|
|
||
|
log_info('img2img_kwargs["seed"]', img2img_kwargs["seed"])
|
||
|
|
||
|
if "variation" in obj:
|
||
|
v = obj["variation"]
|
||
|
if is_seed_valid(v):
|
||
|
s = int(img2img_kwargs["seed"])
|
||
|
v = int(v)
|
||
|
ns = vary_seed(s, v)
|
||
|
log_info(f"Using seed variation {v}: {ns}")
|
||
|
img2img_kwargs["seed"] = ns
|
||
|
|
||
|
img2img_kwargs["job_info"] = job_info
|
||
|
# img2img_kwargs["job_info"] = None
|
||
|
img2img_kwargs["fp"] = fp
|
||
|
img2img_kwargs["init_info"] = img
|
||
|
if img2img_kwargs["image_editor_mode"] == "Mask":
|
||
|
img2img_kwargs["init_info_mask"] = {
|
||
|
"image": img.convert("RGB").convert("RGBA"),
|
||
|
"mask": img.getchannel("A")
|
||
|
}
|
||
|
# render_intermediate(img2img_kwargs["init_info_mask"]["mask"].convert("RGBA"), obj)
|
||
|
log_info("img2img_kwargs")
|
||
|
log_info(img2img_kwargs)
|
||
|
|
||
|
return img2img_kwargs
|
||
|
|
||
|
def prepare_txt2img_kwargs(seeds, obj):
|
||
|
# log_trace(f"prepare_txt2img_kwargs({obj})")
|
||
|
txt2img_kwargs = {}
|
||
|
# txt2img_kwargs.update(txt2img_defaults)
|
||
|
func_args = function_args["txt2img"]
|
||
|
for k,v in txt2img_defaults.items():
|
||
|
if k in func_args:
|
||
|
txt2img_kwargs[k] = v
|
||
|
|
||
|
if "size" in obj:
|
||
|
txt2img_kwargs["width"] = obj["size"][0]
|
||
|
txt2img_kwargs["height"] = obj["size"][1]
|
||
|
|
||
|
for k,v in func_args.items():
|
||
|
if k in obj:
|
||
|
txt2img_kwargs[k] = obj[k]
|
||
|
|
||
|
if "toggles" in txt2img_kwargs:
|
||
|
txt2img_kwargs["toggles"] = list(txt2img_kwargs["toggles"])
|
||
|
|
||
|
assert("seed" in txt2img_kwargs)
|
||
|
if "seed" in txt2img_kwargs:
|
||
|
s = txt2img_kwargs["seed"]
|
||
|
if is_seed_valid(s):
|
||
|
txt2img_kwargs["seed"] = int(s)
|
||
|
else:
|
||
|
txt2img_kwargs["seed"] = next(seeds)
|
||
|
|
||
|
log_info('txt2img_kwargs["seed"]', txt2img_kwargs["seed"])
|
||
|
|
||
|
if "variation" in obj:
|
||
|
v = obj["variation"]
|
||
|
if is_seed_valid(v):
|
||
|
s = int(txt2img_kwargs["seed"])
|
||
|
v = int(v)
|
||
|
ns = vary_seed(s, v)
|
||
|
log_info(f"Using seed variation {v}: {ns}")
|
||
|
txt2img_kwargs["seed"] = ns
|
||
|
|
||
|
txt2img_kwargs["job_info"] = job_info
|
||
|
# txt2img_kwargs["job_info"] = None
|
||
|
txt2img_kwargs["fp"] = fp
|
||
|
|
||
|
log_info("txt2img_kwargs")
|
||
|
log_info(txt2img_kwargs)
|
||
|
|
||
|
return txt2img_kwargs
|
||
|
|
||
|
def render_img2img(seeds, obj):
|
||
|
global scn2img_cache
|
||
|
if obj["size"] is None:
|
||
|
obj["size"] = (img2img_defaults["width"], img2img_defaults["height"])
|
||
|
img = create_image(obj["size"], obj["color"])
|
||
|
img = blend_objects(
|
||
|
seeds,
|
||
|
img,
|
||
|
obj.children
|
||
|
)
|
||
|
img = render_mask(seeds, obj, img)
|
||
|
img = render_intermediate(img, obj)
|
||
|
|
||
|
img2img_kwargs = prepare_img2img_kwargs(seeds, obj, img)
|
||
|
|
||
|
used_kwargs.append(("img2img", img2img_kwargs))
|
||
|
|
||
|
# obj_hash = hash(str((img2img_kwargs["seed"],obj)))
|
||
|
obj_hash = obj.cache_hash(
|
||
|
seed = img2img_kwargs["seed"],
|
||
|
exclude_args = {"select", "pos", "rotation"}
|
||
|
)
|
||
|
if obj_hash not in scn2img_cache["cache"]:
|
||
|
if job_info: count_images_before = len(job_info.images)
|
||
|
outputs, seed, info, stats = img2img(
|
||
|
**img2img_kwargs
|
||
|
)
|
||
|
if job_info:
|
||
|
# img2img will output into job_info.images.
|
||
|
# we want to cache only the new images.
|
||
|
# extract new images and remove them from job_info.images.
|
||
|
assert(job_info.images == outputs)
|
||
|
outputs = job_info.images[count_images_before:]
|
||
|
outputs = [img.convert("RGBA") for img in outputs]
|
||
|
num_new = len(outputs)
|
||
|
# use images.pop so that images list is modified inplace and stays the same object.
|
||
|
for k in range(num_new):
|
||
|
job_info.images.pop()
|
||
|
scn2img_cache["cache"][obj_hash] = outputs, seed, info, stats
|
||
|
|
||
|
outputs, seed, info, stats = scn2img_cache["cache"][obj_hash]
|
||
|
|
||
|
for img in outputs:
|
||
|
output_img(img)
|
||
|
|
||
|
log_info("outputs", outputs)
|
||
|
|
||
|
# select img from outputs
|
||
|
if len(outputs) > 0:
|
||
|
select = obj["select"] or 0
|
||
|
img = outputs[select]
|
||
|
else:
|
||
|
# no outputs, so we just use (the input) img without modifying it
|
||
|
# img = img
|
||
|
pass
|
||
|
|
||
|
# img = render_mask(seeds, obj, img)
|
||
|
img = resize_image(img, obj["resize"], obj["crop"])
|
||
|
if img is None: log_warn(f"result of render_img2img({obj}) is None")
|
||
|
img = render_3d(img, obj)
|
||
|
img = render_intermediate(img, obj)
|
||
|
return img
|
||
|
|
||
|
def render_txt2img(seeds, obj):
|
||
|
global scn2img_cache
|
||
|
|
||
|
txt2img_kwargs = prepare_txt2img_kwargs(seeds, obj)
|
||
|
|
||
|
used_kwargs.append(("txt2img", txt2img_kwargs))
|
||
|
|
||
|
# obj_hash = hash(str((txt2img_kwargs["seed"],obj)))
|
||
|
obj_hash = obj.cache_hash(
|
||
|
seed = txt2img_kwargs["seed"],
|
||
|
exclude_args = {"select", "pos", "rotation"}
|
||
|
)
|
||
|
if obj_hash not in scn2img_cache["cache"]:
|
||
|
if job_info: count_images_before = len(job_info.images)
|
||
|
outputs, seed, info, stats = txt2img(
|
||
|
**txt2img_kwargs
|
||
|
)
|
||
|
if job_info:
|
||
|
# txt2img will output into job_info.images.
|
||
|
# we want to cache only the new images.
|
||
|
# extract new images and remove them from job_info.images.
|
||
|
assert(job_info.images == outputs)
|
||
|
outputs = job_info.images[count_images_before:]
|
||
|
outputs = [img.convert("RGBA") for img in outputs]
|
||
|
num_new = len(outputs)
|
||
|
# use images.pop so that images list is modified inplace and stays the same object.
|
||
|
for k in range(num_new):
|
||
|
job_info.images.pop()
|
||
|
scn2img_cache["cache"][obj_hash] = outputs, seed, info, stats
|
||
|
|
||
|
outputs, seed, info, stats = scn2img_cache["cache"][obj_hash]
|
||
|
|
||
|
for img in outputs:
|
||
|
output_img(img)
|
||
|
|
||
|
log_info("outputs", outputs)
|
||
|
|
||
|
# select img from outputs
|
||
|
if len(outputs) > 0:
|
||
|
select = obj["select"] or 0
|
||
|
img = outputs[select]
|
||
|
else:
|
||
|
# no outputs, so we use None
|
||
|
img = None
|
||
|
|
||
|
img = render_mask(seeds, obj, img)
|
||
|
img = resize_image(img, obj["resize"], obj["crop"])
|
||
|
if img is None: log_warn(f"result of render_txt2img({obj}) is None")
|
||
|
img = render_3d(img, obj)
|
||
|
img = render_intermediate(img, obj)
|
||
|
return img
|
||
|
|
||
|
def render_object(seeds, obj):
|
||
|
# log_trace(f"render_object({str(obj)})")
|
||
|
|
||
|
if "initial_seed" in obj:
|
||
|
seeds = gen_seeds(obj["initial_seed"])
|
||
|
|
||
|
if obj.func == "scene":
|
||
|
assert(len(obj.children) == 1)
|
||
|
return render_object(seeds, obj.children[0])
|
||
|
elif obj.func == "image":
|
||
|
return render_image(seeds, obj)
|
||
|
elif obj.func == "img2img":
|
||
|
return render_img2img(seeds, obj)
|
||
|
elif obj.func == "txt2img":
|
||
|
return render_txt2img(seeds, obj)
|
||
|
else:
|
||
|
msg = f"Got unexpected SceneObject type {obj.func}"
|
||
|
comments.append(msg)
|
||
|
return None
|
||
|
|
||
|
def render_scn2img(seeds, obj):
|
||
|
result = []
|
||
|
|
||
|
if "initial_seed" in obj:
|
||
|
seeds = gen_seeds(obj["initial_seed"])
|
||
|
|
||
|
if obj.func == "scn2img":
|
||
|
# Note on seed generation and for-loops instead of
|
||
|
# list-comprehensions:
|
||
|
#
|
||
|
# For instead of list-comprehension to ensure order as
|
||
|
# list-comprehension order is not guaranteed. Seed generator must be
|
||
|
# used by children in deterministic order.
|
||
|
#
|
||
|
# This also applies elsewhere.
|
||
|
for child in obj.children:
|
||
|
result.append(render_object(seeds, child))
|
||
|
else:
|
||
|
result.append(render_object(seeds, obj))
|
||
|
return result
|
||
|
|
||
|
for img in render_scn2img(seeds, scene):
|
||
|
if output_intermediates:
|
||
|
# img already in output, do nothing here
|
||
|
pass
|
||
|
else:
|
||
|
output_img(img)
|
||
|
|
||
|
if skip_save:
|
||
|
# individual image save was skipped,
|
||
|
# we need to save them now
|
||
|
save_sample_scn2img(img, scene)
|
||
|
|
||
|
|
||
|
return output_images
|
||
|
|
||
|
|
||
|
start_time = time.time()
|
||
|
|
||
|
mem_mon = MemUsageMonitor('MemMon')
|
||
|
mem_mon.start()
|
||
|
|
||
|
used_kwargs = []
|
||
|
|
||
|
scene = parse_scene(prompt, comments)
|
||
|
log_info("scene")
|
||
|
log_info(scene)
|
||
|
# log_info("comments", comments)
|
||
|
|
||
|
render_scene(output_images, scene, gen_seeds(seed))
|
||
|
log_info("output_images", output_images)
|
||
|
# log_info("comments", comments)
|
||
|
|
||
|
# comments.append(str(scene))
|
||
|
mem_max_used, mem_total = mem_mon.read_and_stop()
|
||
|
time_diff = time.time()-start_time
|
||
|
|
||
|
|
||
|
output_infos = []
|
||
|
output_infos.append(("initial_seed", seed))
|
||
|
excluded_args = set(["job_info", "fp", "init_info", "init_info_mask", "prompt"])
|
||
|
if len(used_kwargs) > 0:
|
||
|
for func, kwargs in used_kwargs:
|
||
|
output_infos.append("\n")
|
||
|
output_infos.append(("", func))
|
||
|
output_infos.append(kwargs["prompt"])
|
||
|
for arg,value in kwargs.items():
|
||
|
if arg in excluded_args: continue
|
||
|
if value is None: continue
|
||
|
if type(value) == dict: continue
|
||
|
if type(value) == Image: continue
|
||
|
output_infos.append((arg,value))
|
||
|
|
||
|
full_string = ""
|
||
|
entities = []
|
||
|
for output_info in output_infos:
|
||
|
if type(output_info) == str:
|
||
|
full_string += output_info
|
||
|
else:
|
||
|
assert(type(output_info) is tuple)
|
||
|
k,v = output_info
|
||
|
label = f" {k}:" if len(k) > 0 else ""
|
||
|
entity = {
|
||
|
'entity': str(v),
|
||
|
'start': len(full_string),
|
||
|
'end': len(full_string) + len(label),
|
||
|
}
|
||
|
entities.append(entity)
|
||
|
full_string += label
|
||
|
|
||
|
info = {
|
||
|
'text': full_string,
|
||
|
'entities': entities
|
||
|
}
|
||
|
num_prompts = 1
|
||
|
stats = " ".join([
|
||
|
f"Took { round(time_diff, 2) }s total ({ round(time_diff/(num_prompts),2) }s per image)",
|
||
|
f"Peak memory usage: { -(mem_max_used // -1_048_576) } MiB / { -(mem_total // -1_048_576) } MiB / { round(mem_max_used/mem_total*100, 3) }%",
|
||
|
])
|
||
|
|
||
|
|
||
|
return output_images, seed, info, stats, repr(scene)
|
||
|
|
||
|
|
||
|
return scn2img
|
||
|
|
||
|
def run_monocular_depth_estimation_multi(images, minDepth=10, maxDepth=1000, batch_size=2):
|
||
|
# https://huggingface.co/keras-io/monocular-depth-estimation
|
||
|
# https://huggingface.co/spaces/atsantiago/Monocular_Depth_Filter
|
||
|
global monocular_depth_estimation
|
||
|
if images is None:
|
||
|
return None
|
||
|
if monocular_depth_estimation is None:
|
||
|
try_loading_monocular_depth_estimation()
|
||
|
if monocular_depth_estimation is None:
|
||
|
return None
|
||
|
if type(images) == Image:
|
||
|
images = [images]
|
||
|
loaded_images = []
|
||
|
for image in images:
|
||
|
# print("image", image)
|
||
|
# print("type(image)", type(image))
|
||
|
#if type(image) is Image:
|
||
|
# image = np.asarray(image.convert("RGB"))
|
||
|
try:
|
||
|
image = image.convert("RGB")
|
||
|
image = image.resize((640, 480))
|
||
|
except:
|
||
|
pass
|
||
|
image = np.asarray(image)
|
||
|
x = np.clip(image.reshape(480, 640, 3) / 255, 0, 1)
|
||
|
loaded_images.append(x)
|
||
|
loaded_images = np.stack(loaded_images, axis=0)
|
||
|
images = loaded_images
|
||
|
|
||
|
# Support multiple RGB(A)s, one RGB(A) image, even grayscale
|
||
|
if len(images.shape) < 3: images = np.stack((images, images, images), axis=2)
|
||
|
if len(images.shape) < 4: images = images.reshape((1, images.shape[0], images.shape[1], images.shape[2]))
|
||
|
if images.shape[3] > 3: images = images[:,:,:,:3]
|
||
|
|
||
|
# Compute predictions
|
||
|
predictions = monocular_depth_estimation.predict(images, batch_size=batch_size)
|
||
|
|
||
|
def depth_norm(x, maxDepth):
|
||
|
return maxDepth / x
|
||
|
|
||
|
# Put in expected range
|
||
|
# print("Max Depth:", np.amax(predictions), maxDepth)
|
||
|
# print("Min Depth:", np.amin(predictions), minDepth)
|
||
|
depths = np.clip(depth_norm(predictions, maxDepth=maxDepth), minDepth, maxDepth) / maxDepth
|
||
|
return depths
|
||
|
|
||
|
def run_monocular_depth_estimation_single(image, minDepth=10, maxDepth=1000):
|
||
|
depth = run_monocular_depth_estimation_multi([image], minDepth, maxDepth)[0][:,:,0]
|
||
|
return depth
|
||
|
|
||
|
def run_Monocular_Depth_Filter_multi(images, filter_min_depth:float, filter_max_depth:float, invert:bool, normalize_depth:bool, mask_is_depth:bool, **kwargs):
|
||
|
# https://huggingface.co/spaces/atsantiago/Monocular_Depth_Filter
|
||
|
depths = run_monocular_depth_estimation_multi(images, **kwargs)
|
||
|
if depths is None:
|
||
|
return None
|
||
|
n,h,w,c = depths.shape
|
||
|
# print("run_Monocular_Depth_Filter n,h,w,c", n,h,w,c)
|
||
|
outputs = []
|
||
|
for k in range(n):
|
||
|
depth = depths[k][:,:,0]
|
||
|
mask = run_depth_filter(depth, filter_min_depth, filter_max_depth, invert, normalize_depth, mask_is_depth)
|
||
|
outputs.append(mask)
|
||
|
return outputs
|
||
|
|
||
|
def run_Monocular_Depth_Filter_single(image, filter_min_depth:float, filter_max_depth:float, invert:bool, normalize_depth:bool, mask_is_depth:bool, **kwargs):
|
||
|
depths = run_Monocular_Depth_Filter_multi([image], filter_min_depth, filter_max_depth, invert, normalize_depth, mask_is_depth, **kwargs)
|
||
|
return depths[0]
|
||
|
|
||
|
|
||
|
def run_midas_depth_estimation(image):
|
||
|
global midas_depth_estimation
|
||
|
global midas_transform
|
||
|
if image is None:
|
||
|
return None
|
||
|
if midas_depth_estimation is None or midas_transform is None:
|
||
|
try_loading_midas_depth_estimation()
|
||
|
if midas_depth_estimation is None or midas_transform is None:
|
||
|
return None
|
||
|
|
||
|
image = image.convert("RGB")
|
||
|
image = np.asarray(image)
|
||
|
|
||
|
device = "cpu"
|
||
|
input_batch = midas_transform(image).to(device)
|
||
|
with torch.no_grad():
|
||
|
prediction = midas_depth_estimation(input_batch)
|
||
|
|
||
|
prediction = torch.nn.functional.interpolate(
|
||
|
prediction.unsqueeze(1),
|
||
|
size=image.shape[:2],
|
||
|
mode="bicubic",
|
||
|
align_corners=False,
|
||
|
).squeeze()
|
||
|
|
||
|
output = prediction.cpu().numpy()
|
||
|
depth = 1 - output / np.max(output)
|
||
|
return depth
|
||
|
|
||
|
def run_midas_depth_filter(image, filter_min_depth:float, filter_max_depth:float, invert:bool, normalize_depth:bool, mask_is_depth:bool):
|
||
|
depth = run_midas_depth_estimation(image)
|
||
|
|
||
|
return run_depth_filter(depth, filter_min_depth, filter_max_depth, invert, normalize_depth, mask_is_depth)
|
||
|
|
||
|
|
||
|
def run_depth_filter(depth: np.ndarray, filter_min_depth:float, filter_max_depth:float, invert:bool, normalize_depth:bool, mask_is_depth:bool):
|
||
|
if depth is None:
|
||
|
return None
|
||
|
|
||
|
if normalize_depth:
|
||
|
depth = depth - np.min(depth)
|
||
|
depth = depth / np.max(depth)
|
||
|
|
||
|
if mask_is_depth:
|
||
|
depth = (depth - filter_min_depth) * (1.0/(filter_max_depth - filter_min_depth))
|
||
|
depth[depth < 0] = 0
|
||
|
depth[depth > 1] = 1
|
||
|
mask = (depth*255).astype(np.uint8)
|
||
|
else:
|
||
|
filt_arr_min = (depth > filter_min_depth)
|
||
|
filt_arr_max = (depth < filter_max_depth)
|
||
|
mask = np.logical_and(filt_arr_min, filt_arr_max).astype(np.uint8) * 255
|
||
|
|
||
|
if invert:
|
||
|
mask = 255-mask
|
||
|
|
||
|
mask = Image.fromarray(mask,"L")
|
||
|
|
||
|
return mask
|
||
|
|
||
|
def run_depth_estimation(image:Image, model_idx:int):
|
||
|
funcs_depth_estimation = [run_monocular_depth_estimation_single, run_midas_depth_estimation]
|
||
|
func_depth_estimation = funcs_depth_estimation[model_idx]
|
||
|
depth = func_depth_estimation(image)
|
||
|
return depth
|
||
|
|
||
|
@numba.jit
|
||
|
def depth_reprojection(xyz:np.ndarray, depth:np.ndarray, depth_scale:float, fx:float, fy:float, cx:float, cy:float):
|
||
|
h,w = depth.shape[:2]
|
||
|
for v in range(h):
|
||
|
y = fy*(v - cy)
|
||
|
for u in range(w):
|
||
|
x = fx*(u - cx)
|
||
|
z = depth[v,u] * depth_scale
|
||
|
xyz[v,u,0] = x*z
|
||
|
xyz[v,u,1] = y*z
|
||
|
xyz[v,u,2] = z
|
||
|
|
||
|
def run_3d_estimation(depth:np.ndarray, depth_scale:float=1, hfov_rad:float=60*math.pi/180):
|
||
|
pass
|
||
|
h,w = depth.shape[:2]
|
||
|
cam_info = CameraInfo((h,w), hfov_rad)
|
||
|
xyz = np.empty(shape=(h, w, 3), dtype=np.float32)
|
||
|
depth_reprojection(xyz, depth, depth_scale, cam_info.fx, cam_info.fy, cam_info.cx, cam_info.cy)
|
||
|
return xyz
|
||
|
|
||
|
@numba.jit
|
||
|
def transform_image_3d(img_out:np.ndarray, img_in:np.ndarray, depth:np.ndarray, depth_near:float, depth_scale:float,
|
||
|
fx0:float, fy0:float, cx0:float, cy0:float,
|
||
|
fx1:float, fy1:float, cx1:float, cy1:float,
|
||
|
rot_cam1_cam0: np.ndarray, offset_cam1_cam0: np.ndarray,
|
||
|
min_mask:int, max_mask:int):
|
||
|
# assert(img_in.shape[2] == 4)
|
||
|
# assert(img_out.shape[2] == 4)
|
||
|
# assert(len(depth.shape) == 2)
|
||
|
# (u0,v0) : 2d pixel position in img_in
|
||
|
# pos_cam0 : 3d pixel position in cam0 coordinate system
|
||
|
# pos_cam1 : 3d pixel position in cam1 coordinate system
|
||
|
# (u1,v1) : 2d pixel position in img_out
|
||
|
m00 = rot_cam1_cam0[0,0]
|
||
|
m01 = rot_cam1_cam0[0,1]
|
||
|
m02 = rot_cam1_cam0[0,2]
|
||
|
m10 = rot_cam1_cam0[1,0]
|
||
|
m11 = rot_cam1_cam0[1,1]
|
||
|
m12 = rot_cam1_cam0[1,2]
|
||
|
m20 = rot_cam1_cam0[2,0]
|
||
|
m21 = rot_cam1_cam0[2,1]
|
||
|
m22 = rot_cam1_cam0[2,2]
|
||
|
h0 = int(depth.shape[0])
|
||
|
w0 = int(depth.shape[1])
|
||
|
h1 = int(img_out.shape[0])
|
||
|
w1 = int(img_out.shape[1])
|
||
|
for v0 in range(h0):
|
||
|
y0_ = fy0*(v0 - cy0)
|
||
|
for u0 in range(w0):
|
||
|
r,g,b,a = img_in[v0,u0]
|
||
|
# img_out[v0,u0,0] = r
|
||
|
# img_out[v0,u0,1] = g
|
||
|
# img_out[v0,u0,2] = b
|
||
|
# img_out[v0,u0,3] = a
|
||
|
# continue
|
||
|
# if not (min_mask <= a <= max_mask): continue
|
||
|
x0_ = fx0*(u0 - cx0)
|
||
|
z0 = depth_near + depth[v0,u0] * depth_scale
|
||
|
x0 = x0_ * z0
|
||
|
y0 = y0_ * z0
|
||
|
x1 = offset_cam1_cam0[0] + m00*x0 + m01*y0 + m02*z0
|
||
|
y1 = offset_cam1_cam0[1] + m10*x0 + m11*y0 + m12*z0
|
||
|
z1 = offset_cam1_cam0[2] + m20*x0 + m21*y0 + m22*z0
|
||
|
# pos_cam0 = (x0*z0,y0*z0,z0)
|
||
|
# pos_cam1 = offset_cam1_cam0 + rot_cam1_cam0 @ pos_cam0
|
||
|
# x1,y1,z1 = pos_cam1
|
||
|
if z1 <= 0: continue
|
||
|
u1 = int(0.5 + (x1/(z1*fx1))+cx1)
|
||
|
v1 = int(0.5 + (y1/(z1*fy1))+cy1)
|
||
|
if u1 < 0: u1 = 0
|
||
|
if u1 >= w1: u1 = w1-1
|
||
|
if v1 < 0: v1 = 0
|
||
|
if v1 >= h1: v1 = h1-1
|
||
|
# if not (0 <= u1 < w1): continue
|
||
|
# if not (0 <= v1 < h1): continue
|
||
|
img_out[v1,u1,0] = r
|
||
|
img_out[v1,u1,1] = g
|
||
|
img_out[v1,u1,2] = b
|
||
|
img_out[v1,u1,3] = a
|
||
|
|
||
|
class CameraInfo:
|
||
|
def __init__(self, image_size:Tuple[int,int], hfov_rad:float=60*math.pi/180, pose:np.ndarray=None):
|
||
|
self.width = image_size[0]
|
||
|
self.height = image_size[1]
|
||
|
self.aspect_ratio = self.width * (1.0 / self.height)
|
||
|
self.hfov_rad = hfov_rad
|
||
|
self.vfov_rad = self.hfov_rad / self.aspect_ratio
|
||
|
half_width = self.width * 0.5
|
||
|
half_height = self.width * 0.5
|
||
|
self.fx = math.tan(self.hfov_rad*0.5) / half_width
|
||
|
self.fy = math.tan(self.vfov_rad*0.5) / half_height
|
||
|
self.cx = half_width
|
||
|
self.cy = half_height
|
||
|
self.pose = pose if pose is not None else np.eye(4)
|
||
|
assert(self.pose.shape==(4,4))
|
||
|
|
||
|
def run_transform_image_3d(image:Image, depth:np.ndarray, depth_near:float, depth_scale:float, from_caminfo: CameraInfo, to_caminfo: CameraInfo, min_mask:int, max_mask:int, mask_invert:bool):
|
||
|
if image is None: return None
|
||
|
h,w = image.size
|
||
|
image_in = np.asarray(image.convert("RGBA"))
|
||
|
image_out = np.zeros(shape=(h,w,4),dtype=np.uint8)
|
||
|
tf_world_cam0 = from_caminfo.pose
|
||
|
tf_world_cam1 = to_caminfo.pose
|
||
|
tf_cam1_world = affine_inv(tf_world_cam1)
|
||
|
tf_cam1_cam0 = tf_cam1_world @ tf_world_cam0
|
||
|
rot_cam1_cam0 = tf_cam1_cam0[:3,:3]
|
||
|
offset_cam1_cam0 = tf_cam1_cam0[:3,3]
|
||
|
# print("depth_scale", depth_scale)
|
||
|
# print("from_caminfo.fx", from_caminfo.fx)
|
||
|
# print("from_caminfo.fy", from_caminfo.fy)
|
||
|
# print("from_caminfo.cx", from_caminfo.cx)
|
||
|
# print("from_caminfo.cy", from_caminfo.cy)
|
||
|
# print("to_caminfo.fx", to_caminfo.fx)
|
||
|
# print("to_caminfo.fy", to_caminfo.fy)
|
||
|
# print("to_caminfo.cx", to_caminfo.cx)
|
||
|
# print("to_caminfo.cy", to_caminfo.cy)
|
||
|
# print("rot_cam1_cam0", rot_cam1_cam0)
|
||
|
# print("offset_cam1_cam0", offset_cam1_cam0)
|
||
|
# print("min_mask", min_mask)
|
||
|
# print("max_mask", max_mask)
|
||
|
|
||
|
transform_image_3d(
|
||
|
image_out, image_in, depth, depth_near, depth_scale,
|
||
|
from_caminfo.fx, from_caminfo.fy, from_caminfo.cx, from_caminfo.cy,
|
||
|
to_caminfo.fx, to_caminfo.fy, to_caminfo.cx, to_caminfo.cy,
|
||
|
rot_cam1_cam0, offset_cam1_cam0,
|
||
|
min_mask, max_mask
|
||
|
)
|
||
|
if mask_invert:
|
||
|
image_out[:,:,3] = 255 - image_out[:,:,3]
|
||
|
return Image.fromarray(image_out,"RGBA")
|
||
|
|
||
|
def run_transform_image_3d_simple(image:Image, depth:np.ndarray, depth_near:float, depth_scale:float,
|
||
|
hfov0_rad:float, tf_world_cam0: np.ndarray,
|
||
|
hfov1_rad:float, tf_world_cam1: np.ndarray,
|
||
|
min_mask:int, max_mask:int, mask_invert:bool):
|
||
|
from_caminfo = CameraInfo(image.size, hfov0_rad, tf_world_cam0)
|
||
|
to_caminfo = CameraInfo(image.size, hfov1_rad, tf_world_cam1)
|
||
|
return run_transform_image_3d(image, depth, depth_near, depth_scale, from_caminfo, to_caminfo, min_mask, max_mask, mask_invert)
|
||
|
|
||
|
def translation3d(x,y,z):
|
||
|
return np.array([
|
||
|
[1,0,0,x],
|
||
|
[0,1,0,y],
|
||
|
[0,0,1,z],
|
||
|
[0,0,0,1],
|
||
|
])
|
||
|
|
||
|
def rotation3d_x(angle):
|
||
|
cs,sn = math.cos(angle), math.sin(angle)
|
||
|
return np.array([
|
||
|
[1,0,0,0],
|
||
|
[0,cs,-sn,0],
|
||
|
[0,+sn,cs,0],
|
||
|
[0,0,0,1],
|
||
|
])
|
||
|
def rotation3d_y(angle):
|
||
|
cs,sn = math.cos(angle), math.sin(angle)
|
||
|
return np.array([
|
||
|
[cs,0,+sn,0],
|
||
|
[0,1,0,0],
|
||
|
[-sn,0,cs,0],
|
||
|
[0,0,0,1],
|
||
|
])
|
||
|
def rotation3d_z(angle):
|
||
|
cs,sn = math.cos(angle), math.sin(angle)
|
||
|
return np.array([
|
||
|
[cs,-sn,0,0],
|
||
|
[+sn,cs,0,0],
|
||
|
[0,0,1,0],
|
||
|
[0,0,0,1],
|
||
|
])
|
||
|
|
||
|
def rotation3d_rpy(roll, pitch, yaw):
|
||
|
# Diebel, J. (2006). Representing attitude: Euler angles, unit quaternions, and rotation vectors. Matrix, 58(15-16), 1-35.
|
||
|
# (the paper uses inverse transformations to ours, i.e. transformations from world to body)
|
||
|
# euler-1-2-3 scheme
|
||
|
|
||
|
# transforms from body to world
|
||
|
return rotation3d_z(yaw) @ rotation3d_y(pitch) @ rotation3d_x(roll)
|
||
|
|
||
|
def rpy_from_rotation3d(mat):
|
||
|
# Diebel, J. (2006). Representing attitude: Euler angles, unit quaternions, and rotation vectors. Matrix, 58(15-16), 1-35.
|
||
|
# (the paper uses inverse transformations to ours, i.e. transformations from world to body)
|
||
|
# euler-1-2-3 scheme
|
||
|
matT = mat.T
|
||
|
roll = np.arctan2(matT[1,2], matT[2,2])
|
||
|
pitch = -np.arcsin(matT[0,2])
|
||
|
yaw = np.arctan2(matT[0,1], matT[0,0])
|
||
|
|
||
|
return np.array([roll,pitch,yaw])
|
||
|
|
||
|
def affine_inv(mat44):
|
||
|
rot=mat44[:3,:3]
|
||
|
trans=mat44[:3,3]
|
||
|
inv_rot=rot.T
|
||
|
inv_trans=-inv_rot@trans
|
||
|
return pose3d(inv_rot, inv_trans)
|
||
|
|
||
|
def pose3d(rotation, translation):
|
||
|
mat44 = np.zeros(shape=(4,4),dtype=rotation.dtype)
|
||
|
mat44[:3,:3] = rotation
|
||
|
mat44[:3,3] = translation
|
||
|
return mat44
|
||
|
|
||
|
def pose3d_rpy(x, y, z, roll, pitch, yaw):
|
||
|
"""returns transformation matrix which transforms from pose to world"""
|
||
|
return translation3d(x,y,z) @ rotation3d_rpy(roll, pitch, yaw)
|