Open ArtLeav opened 4 months ago
My attempt to solve it look like
# @title ## **3.4. Bucketing and Latents Caching**
%store -r
# @markdown This code will create buckets based on the `bucket_resolution` provided for multi-aspect ratio training, and then convert all images within the `train_data_dir` to latents.
bucketing_json = os.path.join(training_dir, "meta_lat.json")
metadata_json = os.path.join(training_dir, "meta_clean.json")
bucket_resolution = 1344 # @param {type:"slider", min:512, max:2048, step:64}
bucket_reso_steps = 32 # @param {type:"slider", min:64, max:2048, step:32}
min_bucket_reso = 512 # @param {type:"slider", min:512, max:1600, step:64}
max_bucket_reso = 1536 # @param {type:"slider", min:512, max:1600, step:64}
mixed_precision = "no" # @param ["no", "fp16", "bf16"] {allow-input: false}
skip_existing = False # @param{type:"boolean"}
flip_aug = False # @param{type:"boolean"}
# @markdown Use `clean_caption` option to clean such as duplicate tags, `women` to `girl`, etc
clean_caption = True #@param {type:"boolean"}
#@markdown Use the `recursive` option to process subfolders as well
recursive = True #@param {type:"boolean"}
metadata_config = {
"_train_data_dir": train_data_dir,
"_out_json": metadata_json,
"recursive": recursive,
"full_path": recursive,
"clean_caption": clean_caption
}
bucketing_config = {
"min_bucket_reso": f"{min_bucket_reso}",
"max_bucket_reso": f"{max_bucket_reso}",
"_train_data_dir": train_data_dir,
"_in_json": metadata_json,
"_out_json": bucketing_json,
"_model_name_or_path": vae_path if vae_path else model_path,
"recursive": recursive,
"full_path": recursive,
"flip_aug": flip_aug,
"skip_existing": skip_existing,
"batch_size": 1,
"max_data_loader_n_workers": 2,
"max_resolution": f"{bucket_resolution}, {bucket_resolution}",
"mixed_precision": mixed_precision,
}
def generate_args(config):
args = ""
for k, v in config.items():
if k.startswith("_"):
args += f'"{v}" '
elif isinstance(v, str):
args += f'--{k}="{v}" '
elif isinstance(v, bool) and v:
args += f"--{k} "
elif isinstance(v, float) and not isinstance(v, bool):
args += f"--{k}={v} "
elif isinstance(v, int) and not isinstance(v, bool):
args += f"--{k}={v} "
return args.strip()
merge_metadata_args = generate_args(metadata_config)
prepare_buckets_args = generate_args(bucketing_config)
merge_metadata_command = f"python merge_all_to_metadata.py {merge_metadata_args}"
prepare_buckets_command = f"python prepare_buckets_latents.py {prepare_buckets_args}"
os.chdir(finetune_dir)
!{merge_metadata_command}
time.sleep(1)
!{prepare_buckets_command}
but it upscale 1024x1024 to 1536
I made simple code to resize pictures, but doesn't know how implement it to latents buckets
from PIL import Image
import os
src_dir = r'C:\Path\to\Images\Input'
dst_dir = r'C:\Path\to\Images\Output'
quality_val = 100
resolutions = [(1024, 1024), (896, 1152), (832, 1216), (768, 1344), (640, 1536), (1152, 896), (1216, 832), (1344, 768), (1536, 640)]
def resize_and_crop(img, size):
img_ratio = img.size[0] / float(img.size[1])
ratio = size[0] / float(size[1])
if ratio > img_ratio:
img = img.resize((size[0], int(size[0] * img.size[1] / img.size[0])), Image.LANCZOS)
box = (0, (img.size[1] - size[1]) / 2, img.size[0], (img.size[1] + size[1]) / 2)
img = img.crop(box)
elif ratio < img_ratio:
img = img.resize((int(size[1] * img.size[0] / img.size[1]), size[1]), Image.LANCZOS)
box = ((img.size[0] - size[0]) / 2, 0, (img.size[0] + size[0]) / 2, img.size[1])
img = img.crop(box)
else :
img = img.resize((size[0], size[1]), Image.LANCZOS)
return img
def closest(lst, K):
return lst[min(range(len(lst)), key = lambda i: abs(lst[i][0]/lst[i][1]-K))]
for filename in os.listdir(src_dir):
if filename.endswith(('.jpg', '.png', '.jpeg')): # add file types as needed
img = Image.open(os.path.join(src_dir, filename))
closest_res = closest(resolutions, img.size[0]/img.size[1])
new_img = resize_and_crop(img, closest_res)
if new_img.mode == 'RGBA':
new_img = new_img.convert('RGB')
new_filename = f'{os.path.splitext(filename)[0]}.jpg'
new_img.save(os.path.join(dst_dir, new_filename), quality=quality_val)
I just set the bucket_no_upscale flag, that fixed it, and just in case changed the max reso:
But weird thing was that if my training resolution was set to 1472 for an example, I was starting to get really bad results after just a few hundred steps. So for now I'm keeping the training resolution at 1024px and output is fine. But I'm not sure what the code internally is doing now with images which are larger.
# @title ## **3.4. Bucketing and Latents Caching**
%store -r
# @markdown This code will create buckets based on the `bucket_resolution` provided for multi-aspect ratio training, and then convert all images within the `train_data_dir` to latents.
bucketing_json = os.path.join(training_dir, "meta_lat.json")
metadata_json = os.path.join(training_dir, "meta_clean.json")
bucket_resolution = 1472 # @param {type:"slider", min:512, max:2048, step:32}
mixed_precision = "bf16" # @param ["no", "fp16", "bf16"] {allow-input: false}
flip_aug = False # @param{type:"boolean"}
# @markdown Use `clean_caption` option to clean such as duplicate tags, `women` to `girl`, etc
clean_caption = False #@param {type:"boolean"}
#@markdown Use the `recursive` option to process subfolders as well
recursive = False #@param {type:"boolean"}
skip_existing = True #@param {type: "boolean"}
bucket_no_upscale = True #@param {type: "boolean"}
metadata_config = {
"_train_data_dir": train_data_dir,
"_out_json": metadata_json,
"recursive": recursive,
"full_path": recursive,
"clean_caption": clean_caption
}
bucketing_config = {
"_train_data_dir": train_data_dir,
"_in_json": metadata_json,
"_out_json": bucketing_json,
"_model_name_or_path": model_path,
"recursive": recursive,
"full_path": recursive,
"flip_aug": flip_aug,
"batch_size": 24,
"max_data_loader_n_workers": 8,
"max_resolution": f"{bucket_resolution}, {bucket_resolution}",
"mixed_precision": mixed_precision,
"skip_existing": skip_existing,
"bucket_no_upscale": bucket_no_upscale
}
def generate_args(config):
args = ""
for k, v in config.items():
if k.startswith("_"):
args += f'"{v}" '
elif isinstance(v, str):
args += f'--{k}="{v}" '
elif isinstance(v, bool) and v:
args += f"--{k} "
elif isinstance(v, float) and not isinstance(v, bool):
args += f"--{k}={v} "
elif isinstance(v, int) and not isinstance(v, bool):
args += f"--{k}={v} "
return args.strip()
merge_metadata_args = generate_args(metadata_config)
prepare_buckets_args = generate_args(bucketing_config)
merge_metadata_command = f"python merge_all_to_metadata.py {merge_metadata_args}"
prepare_buckets_command = f"python prepare_buckets_latents.py {prepare_buckets_args}"
os.chdir(finetune_dir)
!{merge_metadata_command}
time.sleep(1)
!{prepare_buckets_command}
Hello! I have a question/problem: how can I make sure that resizing (presumably referring to image resizing) does not reduce the longer side of the image to 1024 pixels, but instead maintains specific ratios such as 768x1344? I tried make bucket_resolution higher (up to 2048) but it doesn't help