Closed mainsplainer closed 1 year ago
Currently having issues with this myself on a 1660ti 6gb. Let me know what info would be useful to post. For a start, I'm running ubuntu 22.04, nvidia driver 530.41.03, cuda 12.1. Deleted my old kohya GUI folder yesterday and did a fresh pull from the repo with no luck. I'm able to gen images in a1111 just fine. Here's my config:
{
"pretrained_model_name_or_path": "runwayml/stable-diffusion-v1-5",
"v2": false,
"v_parameterization": false,
"logging_dir": "/home/u1/Desktop/kohya_ss/sets/goku/log",
"train_data_dir": "/home/u1/Desktop/kohya_ss/sets/goku/img",
"reg_data_dir": "/home/u1/Desktop/kohya_ss/sets/goku/reg_images",
"output_dir": "/home/u1/Desktop/kohya_ss/sets/goku/model",
"max_resolution": "512,512",
"learning_rate": "0.00001",
"lr_scheduler": "constant",
"lr_warmup": "0",
"train_batch_size": 1,
"epoch": 10,
"save_every_n_epochs": 1,
"mixed_precision": "fp16",
"save_precision": "fp16",
"seed": "1234",
"num_cpu_threads_per_process": 2,
"cache_latents": true,
"caption_extension": ".txt",
"enable_bucket": true,
"gradient_checkpointing": true,
"full_fp16": false,
"no_token_padding": false,
"stop_text_encoder_training": 0,
"xformers": false,
"save_model_as": "safetensors",
"shuffle_caption": false,
"save_state": false,
"resume": "",
"prior_loss_weight": 1.0,
"text_encoder_lr": "5e-5",
"unet_lr": "0.00001",
"network_dim": 64,
"lora_network_weights": "",
"color_aug": false,
"flip_aug": false,
"clip_skip": 2,
"gradient_accumulation_steps": 1.0,
"mem_eff_attn": true,
"output_name": "gokudb",
"model_list": "runwayml/stable-diffusion-v1-5",
"max_token_length": "75",
"max_train_epochs": "",
"max_data_loader_n_workers": "1",
"network_alpha": 64,
"training_comment": "",
"keep_tokens": "0",
"lr_scheduler_num_cycles": "",
"lr_scheduler_power": "",
"persistent_data_loader_workers": true,
"bucket_no_upscale": true,
"random_crop": false,
"bucket_reso_steps": 64.0,
"caption_dropout_every_n_epochs": 0.0,
"caption_dropout_rate": 0,
"optimizer": "AdamW",
"optimizer_args": "",
"noise_offset": "",
"LoRA_type": "Standard",
"conv_dim": 1,
"conv_alpha": 1,
"sample_every_n_steps": 8,
"sample_every_n_epochs": 0,
"sample_sampler": "euler_a",
"sample_prompts": "high quality, anime screenshot, goku, dragonball --n worst quality, low quality, monochrome, greyscale, (muted color, pale color:1.3) --w 512 --h 512 --d 1 --l 7.5 --s 11",
"additional_parameters": "",
"vae_batch_size": 0,
"min_snr_gamma": 0,
"down_lr_weight": "",
"mid_lr_weight": "",
"up_lr_weight": "",
"block_lr_zero_threshold": "",
"block_dims": "",
"block_alphas": "",
"conv_dims": "",
"conv_alphas": "",
"weighted_captions": false,
"unit": 1,
"save_every_n_steps": 0,
"save_last_n_steps": 0,
"save_last_n_steps_state": 0
}
author of this pr, i have no clue why it fixes that; you might also need to enable cudnn, TU117 is a cursed mess
The 16xx series cards do not seem to support fp16 by default (running stable diffusion in fp16 mode generated black images). This was fixed here > https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/4407
Enabling benchmark mode could potentially fix the issue of loss=nan on these cards.