SDXL issue - Githubissues

I think it worked previously but these days, when i tried SDXL model with SDXL mm and vae, it won't work anymore... here is my prompt.json. for SDXL, i just setup with SDLX model, VAE, Motion model. others are remaining as false...

{
    "name": "20240104_0856",
    "path": "sd_models/sd/majicmixRealistic_betterV2V25.safetensors",
    "vae_path": "vae/sd/vae-ft-mse-840000-ema-pruned.safetensors",
    "motion_module": "motion_modules/sd/mm_sd_v15_v2.safetensors",
    "context_schedule": "uniform",
    "lcm_map": {
        "enable": false,
        "start_scale": 0.15,
        "end_scale": 0.75,
        "gradient_start": 0.2,
        "gradient_end": 0.75
    },
    "gradual_latent_hires_fix_map": {
        "enable": false,
        "scale": {
            "0": 0.5,
            "0.7": 1.0
        },
        "reverse_steps": 5,
        "noise_add_count": 3
    },
    "compile": false,
    "tensor_interpolation_slerp": true,
    "seed": [
        3663073061474465759
    ],
    "scheduler": "k_dpmpp_sde",
    "steps": 8,
    "guidance_scale": 1.5,
    "unet_batch_size": 1,
    "clip_skip": 2,
    "prompt_fixed_ratio": 0.5,
    "head_prompt": "best quality, masterpiece, (photorealistic:1.4), 4k, real skin texture, 1girl, 20 years old,1girl, big breasts, summer, beach,  (bikini :1.4), explorer, adventure, colorful lighting, photo generic, grin, looking at viewer, (daytime: 1.4), pink hair",
    "prompt_map": {
        "0": "best quality"
    },
    "tail_prompt": "",
    "n_prompt": [
        "bad_prompt, easynegative, FastNegativeV2, negative_hand-neg, ng_deepnegative_v1_75t,(worst quality:2), (low quality:2), (normal quality:2), lowres, watermark, monochrome, nsfw, blurry, blurry background, helmet, cap, hair accesary, (night),"
    ],
    "is_single_prompt_mode": false,
    "lora_map": {
        "lora/sd/mak2.safetensors": {
            "region": [
                "0"
            ],
            "scale": {
                "0": 0.3
            }
        },
        "lora/sd/irene_v70.safetensors": {
            "region": [
                "0"
            ],
            "scale": {
                "0": 0.15
            }
        },
        "lora/sd/koreanDollLikeness_v20.safetensors": {
            "region": [
                "0"
            ],
            "scale": {
                "0": 1
            }
        }
    },
    "motion_lora_map": {
        "motion_lora/v2_lora_ZoomOut.ckpt": 0.8
    },
    "ip_adapter_map": {
        "enable": true,
        "input_image_dir": "../stylize/dance00029/00_ipadapter",
        "prompt_fixed_ratio": 0.9,
        "save_input_image": false,
        "resized_to_square": false,
        "scale": 1,
        "is_full_face": false,
        "is_plus_face": false,
        "is_plus": false,
        "is_light": true
    },
    "img2img_map": {
        "enable": false,
        "init_img_dir": "../stylize/dance00029/00_img2img",
        "save_init_image": false,
        "denoising_strength": 0.7
    },
    "region_map": {},
    "controlnet_map": {
        "input_image_dir": "../stylize/dance00029/fg_00_dance00029/2024-01-04_08-58_99/00_controlnet_image",
        "max_samples_on_vram": 0,
        "max_models_on_vram": 0,
        "save_detectmap": true,
        "preprocess_on_gpu": true,
        "is_loop": false,
        "controlnet_tile": {
            "enable": true,
            "use_preprocessor": true,
            "guess_mode": false,
            "controlnet_conditioning_scale": 0.25,
            "control_guidance_start": 0.0,
            "control_guidance_end": 1.0,
            "control_scale_list": []
        },
        "animatediff_controlnet": {
            "enable": true,
            "use_preprocessor": true,
            "guess_mode": false,
            "controlnet_conditioning_scale": 0.1,
            "control_guidance_start": 0.0,
            "control_guidance_end": 1.0,
            "control_scale_list": []
        },
        "controlnet_ip2p": {
            "enable": false,
            "use_preprocessor": true,
            "guess_mode": false,
            "controlnet_conditioning_scale": 0.5,
            "control_guidance_start": 0.0,
            "control_guidance_end": 1.0,
            "control_scale_list": [],
            "control_region_list": []
        },
        "controlnet_lineart_anime": {
            "enable": false,
            "use_preprocessor": true,
            "guess_mode": false,
            "controlnet_conditioning_scale": 0.3,
            "control_guidance_start": 0.0,
            "control_guidance_end": 1.0,
            "control_scale_list": []
        },
        "controlnet_openpose": {
            "enable": true,
            "use_preprocessor": true,
            "guess_mode": false,
            "controlnet_conditioning_scale": 1,
            "control_guidance_start": 0.0,
            "control_guidance_end": 1.0,
            "control_scale_list": [],
            "control_region_list": []
        },
        "controlnet_softedge": {
            "enable": false,
            "use_preprocessor": true,
            "preprocessor": {
                "type": "softedge_pidsafe",
                "param": {}
            },
            "guess_mode": false,
            "controlnet_conditioning_scale": 1.0,
            "control_guidance_start": 0.0,
            "control_guidance_end": 1.0,
            "control_scale_list": []
        },
        "controlnet_shuffle": {
            "enable": false,
            "use_preprocessor": true,
            "guess_mode": false,
            "controlnet_conditioning_scale": 1.0,
            "control_guidance_start": 0.0,
            "control_guidance_end": 1.0,
            "control_scale_list": []
        },
        "controlnet_depth": {
            "enable": false,
            "use_preprocessor": true,
            "guess_mode": false,
            "controlnet_conditioning_scale": 0.5,
            "control_guidance_start": 0.0,
            "control_guidance_end": 1.0,
            "control_scale_list": []
        },
        "controlnet_canny": {
            "enable": false,
            "use_preprocessor": true,
            "guess_mode": false,
            "controlnet_conditioning_scale": 1.0,
            "control_guidance_start": 0.0,
            "control_guidance_end": 1.0,
            "control_scale_list": []
        },
        "controlnet_inpaint": {
            "enable": false,
            "use_preprocessor": true,
            "guess_mode": false,
            "controlnet_conditioning_scale": 1.0,
            "control_guidance_start": 0.0,
            "control_guidance_end": 1.0,
            "control_scale_list": []
        },
        "controlnet_lineart": {
            "enable": false,
            "use_preprocessor": true,
            "guess_mode": false,
            "controlnet_conditioning_scale": 0.5,
            "control_guidance_start": 0.0,
            "control_guidance_end": 1.0,
            "control_scale_list": []
        },
        "controlnet_mlsd": {
            "enable": false,
            "use_preprocessor": true,
            "guess_mode": false,
            "controlnet_conditioning_scale": 1.0,
            "control_guidance_start": 0.0,
            "control_guidance_end": 1.0,
            "control_scale_list": []
        },
        "controlnet_normalbae": {
            "enable": false,
            "use_preprocessor": true,
            "guess_mode": false,
            "controlnet_conditioning_scale": 1.0,
            "control_guidance_start": 0.0,
            "control_guidance_end": 1.0,
            "control_scale_list": []
        },
        "controlnet_scribble": {
            "enable": false,
            "use_preprocessor": true,
            "guess_mode": false,
            "controlnet_conditioning_scale": 1.0,
            "control_guidance_start": 0.0,
            "control_guidance_end": 1.0,
            "control_scale_list": []
        },
        "controlnet_seg": {
            "enable": false,
            "use_preprocessor": true,
            "guess_mode": false,
            "controlnet_conditioning_scale": 1.0,
            "control_guidance_start": 0.0,
            "control_guidance_end": 1.0,
            "control_scale_list": []
        },
        "qr_code_monster_v1": {
            "enable": false,
            "use_preprocessor": true,
            "guess_mode": false,
            "controlnet_conditioning_scale": 1.0,
            "control_guidance_start": 0.0,
            "control_guidance_end": 1.0,
            "control_scale_list": []
        },
        "qr_code_monster_v2": {
            "enable": false,
            "use_preprocessor": true,
            "guess_mode": false,
            "controlnet_conditioning_scale": 1.0,
            "control_guidance_start": 0.0,
            "control_guidance_end": 1.0,
            "control_scale_list": []
        },
        "controlnet_mediapipe_face": {
            "enable": false,
            "use_preprocessor": true,
            "guess_mode": false,
            "controlnet_conditioning_scale": 0.5,
            "control_guidance_start": 0.0,
            "control_guidance_end": 1.0,
            "control_scale_list": []
        },
        "controlnet_ref": {
            "enable": false,
            "ref_image": "../../../animatediff/animatediff-cli-prompt-travel/data/ip_adapter_image/irene/0.png",
            "attention_auto_machine_weight": 0.5,
            "gn_auto_machine_weight": 0.5,
            "style_fidelity": 0.5,
            "reference_attn": true,
            "reference_adain": false,
            "scale_pattern": [
                1.0
            ]
        }
    },
    "upscale_config": {
        "scheduler": "euler_a",
        "steps": 20,
        "strength": 0.5,
        "guidance_scale": 10,
        "controlnet_tile": {
            "enable": true,
            "controlnet_conditioning_scale": 1.0,
            "guess_mode": false,
            "control_guidance_start": 0.0,
            "control_guidance_end": 1.0
        },
        "controlnet_line_anime": {
            "enable": false,
            "controlnet_conditioning_scale": 1.0,
            "guess_mode": false,
            "control_guidance_start": 0.0,
            "control_guidance_end": 1.0
        },
        "controlnet_ip2p": {
            "enable": true,
            "controlnet_conditioning_scale": 0.5,
            "guess_mode": false,
            "control_guidance_start": 0.0,
            "control_guidance_end": 1.0
        },
        "controlnet_ref": {
            "enable": false,
            "use_frame_as_ref_image": false,
            "use_1st_frame_as_ref_image": false,
            "ref_image": "ref_image/path_to_your_ref_img.jpg",
            "attention_auto_machine_weight": 1.0,
            "gn_auto_machine_weight": 1.0,
            "style_fidelity": 0.25,
            "reference_attn": true,
            "reference_adain": false
        }
    },
    "stylize_config": {
        "original_video": {
            "path": "data/video/dance00029.mp4",
            "aspect_ratio": -1,
            "offset": 0
        },
        "create_mask": [
            "person"
        ],
        "composite": {
            "fg_list": [
                {
                    "path": " absolute path to frame dir ",
                    "mask_path": " absolute path to mask dir (this is optional) ",
                    "mask_prompt": "person"
                }
            ],
            "bg_frame_dir": "Absolute path to the BG frame directory",
            "hint": ""
        },
        "0": {
            "width": 512,
            "height": 904,
            "length": 44,
            "context": 16,
            "overlap": 4,
            "stride": 0
        }
    },
    "output": {
        "format": "mp4",
        "fps": 32,
        "encode_param": {
            "crf": 10
        }
    },
    "result": {}
}

this is the error log..

Generating 1 animations
Running generation 1 of 1
Generation seed: 3313706524245448039
len( region_condi_list )=1
len( region_list )=1
apply_lcm_lora=False
multi_uncond_mode=False
do_classifier_free_guidance=True
condi_size=2
  0%|                                                                                                                                                                                                                          | 0/8 [00:00<?, ?steps/s]Forward upsample size to force interpolation output size.
  0%|                                                                                                                                                                                                                          | 0/8 [00:02<?, ?steps/s]
Input and output must have the same number of spatial dimensions, but got input with spatial dimensions of [29, 16] and output size of torch.Size([16, 57, 32]). Please provide input tensor in (N, C, d1, d2, ...,dK) format and output size in (o1, o2, ...,oK) format.
Traceback (most recent call last):
  File "/storage/aj/animatediff-cli-prompt-travel/src/animatediff/front.py", line 233, in execute_impl
    generate(stylize_dir=stylize_dir, length=16)
  File "/storage/aj/animatediff-cli-prompt-travel/src/animatediff/stylize.py", line 618, in generate
    output_0_dir = generate(
  File "/storage/aj/animatediff-cli-prompt-travel/src/animatediff/cli.py", line 445, in generate
    output = run_inference(
  File "/storage/aj/animatediff-cli-prompt-travel/src/animatediff/generate.py", line 1543, in run_inference
    pipeline_output = pipeline(
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/_contextlib.py", line 115, in decorate_context
    return func(*args, **kwargs)
  File "/storage/aj/animatediff-cli-prompt-travel/src/animatediff/pipelines/sdxl_animation.py", line 1937, in __call__
    pred_layer = self.unet(
  File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
  File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1527, in _call_impl
    return forward_call(*args, **kwargs)
  File "/usr/local/lib/python3.10/dist-packages/accelerate/hooks.py", line 165, in new_forward
    output = module._old_forward(*args, **kwargs)
  File "/storage/aj/animatediff-cli-prompt-travel/src/animatediff/sdxl_models/unet.py", line 1108, in forward
    sample = upsample_block(
  File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
  File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1527, in _call_impl
    return forward_call(*args, **kwargs)
  File "/storage/aj/animatediff-cli-prompt-travel/src/animatediff/sdxl_models/unet_blocks.py", line 939, in forward
    hidden_states = upsampler(hidden_states, upsample_size)
  File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
  File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1527, in _call_impl
    return forward_call(*args, **kwargs)
  File "/usr/local/lib/python3.10/dist-packages/diffusers/models/resnet.py", line 189, in forward
    hidden_states = F.interpolate(hidden_states, size=output_size, mode="nearest")
  File "/usr/local/lib/python3.10/dist-packages/torch/nn/functional.py", line 3916, in interpolate
    raise ValueError(
ValueError: Input and output must have the same number of spatial dimensions, but got input with spatial dimensions of [29, 16] and output size of torch.Size([16, 57, 32]). Please provide input tensor in (N, C, d1, d2, ...,dK) format and output size in (o1, o2, ...,oK) format.

i would like to know what does this error stands for? is there anything i have missed to do execution? i tried with tensor_interpolation_slerp=true/false, but it didnt solve the issue.

thank you

s9roll7 / animatediff-cli-prompt-travel

SDXL issue #216