xinsir6 / ControlNetPlus

ControlNet++: All-in-one ControlNet for image generations and editing!
Apache License 2.0
1.73k stars 40 forks source link

Can't get promax outpainting to work #69

Open xiankgx opened 1 week ago

xiankgx commented 1 week ago

I cloned this repo, cd to promax directory and ran the controlnet_union_test_outpainting.py script.

With your original script, it seems that it could get outpainting to work. Here are the original image resized to the generated size, mask, controlnet image, and generated image.

original_img mask controlnet_img generated

xiankgx commented 1 week ago

Then I try to modify the code to do actual outpainting by adding padding, and this is what I get. It didn't generate anything in the masked areas, but take original values from the input image. I attribute this to the strength=0.9999 parameter which somehow leak the original pixels in the masked area to the generated image.

original_img1 mask1 controlnet_img1 generated1

xiankgx commented 1 week ago

Changing the strength to 1.0 makes things kinda work, but the masked areas are not coherent with the unmasked areas.

original_img2 mask2 controlnet_img2 generated2

xiankgx commented 1 week ago

Here is my src code. What do you see could be wrong? @xinsir6

# diffusers测试ControlNet
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
import sys
sys.path.append('..')
import cv2
import copy
import torch
import random
import numpy as np
from PIL import Image
from mask import get_mask_generator
from diffusers.utils import load_image
from diffusers import EulerAncestralDiscreteScheduler, AutoencoderKL
from models.controlnet_union import ControlNetModel_Union
from pipeline.pipeline_controlnet_union_inpaint_sd_xl import StableDiffusionXLControlNetUnionInpaintPipeline

device=torch.device('cuda:0')

eulera_scheduler = EulerAncestralDiscreteScheduler.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", subfolder="scheduler")
vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)
# Note you should set the model and the config to the promax version manually, default is not the promax version. 
from huggingface_hub import snapshot_download
snapshot_download(repo_id="xinsir/controlnet-union-sdxl-1.0", local_dir='controlnet-union-sdxl-1.0')
# you should make a new dir controlnet-union-sdxl-1.0-promax and mv the promax config and promax model into it and rename the promax config and the promax model.
controlnet_model = ControlNetModel_Union.from_pretrained("./controlnet-union-sdxl-1.0-promax", torch_dtype=torch.float16, use_safetensors=True)

pipe = StableDiffusionXLControlNetUnionInpaintPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0", controlnet=controlnet_model, 
    vae=vae,
    torch_dtype=torch.float16,
    # scheduler=ddim_scheduler,
    scheduler=eulera_scheduler,
)

pipe = pipe.to(device)

def HWC3(x):
    assert x.dtype == np.uint8
    if x.ndim == 2:
        x = x[:, :, None]
    assert x.ndim == 3
    H, W, C = x.shape
    assert C == 1 or C == 3 or C == 4
    if C == 3:
        return x
    if C == 1:
        return np.concatenate([x, x, x], axis=2)
    if C == 4:
        color = x[:, :, 0:3].astype(np.float32)
        alpha = x[:, :, 3:4].astype(np.float32) / 255.0
        y = color * alpha + 255.0 * (1.0 - alpha)
        y = y.clip(0, 255).astype(np.uint8)
        return y

mask_gen_kwargs = {
            "min_padding_percent": 0.06, 
            "max_padding_percent": 0.30, 
            "left_padding_prob": 0.5, 
            "top_padding_prob": 0.5, 
            "right_padding_prob": 0.5, 
            "bottom_padding_prob": 0.5
        }

mask_gen = get_mask_generator(kind='outpainting', kwargs=mask_gen_kwargs)

prompt = "Couple, walking and mountain travel for holiday, adventure and happy journey for bonding in nature. Outdoor, people and honeymoon vacation or date together, explore and love in jungle or wilderness"
negative_prompt = 'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'

seed = random.randint(0, 2147483647)

# The original image you want to repaint.

import io
import requests
from PIL import Image

def load_image(path):
    if path.startswith("http"):
        req = requests.get(path)
        image = Image.open(io.BytesIO(req.content))
    else:
        image = Image.open(path)
    return np.array(image)[:,:,::-1]  # RGB -> BGR

# original_img = cv2.imread("your image path")
original_img = load_image("https://us.123rf.com/450wm/peopleimages12/peopleimages122405/peopleimages12240520087/229968872-couple-walking-and-mountain-travel-for-holiday-adventure-and-happy-journey-for-bonding-in-nature-out.jpg?ver=6")
# Note that outpainting now only support square outpainting, that means the border line should be parallel with the image boundary line
# mask = cv2.imread("your mask image path") 

original_code = False

if original_code:
    height, width, _  = original_img.shape
    ratio = np.sqrt(1024. * 1024. / (width * height))
    W, H = int(width * ratio) // 8 * 8, int(height * ratio) // 8 * 8
    original_img = cv2.resize(original_img, (W, H))
    original_img = cv2.cvtColor(original_img, cv2.COLOR_BGR2RGB)

    import copy
    controlnet_img = copy.deepcopy(original_img)
    controlnet_img = np.transpose(controlnet_img, (2, 0, 1))
    mask = mask_gen(controlnet_img)
    controlnet_img = np.transpose(controlnet_img, (1, 2, 0))
    mask = np.transpose(mask, (1, 2, 0))

    controlnet_img[mask.squeeze() > 0.0] = 0
    mask = HWC3((mask * 255).astype('uint8'))

    controlnet_img = Image.fromarray(controlnet_img)
    original_img = Image.fromarray(original_img)
    mask = Image.fromarray(mask)
else:
    def create_controlnet_outpaint_inputs(ori_image: Image.Image, l: float=0, r: float=0, t:float=0, b:float=0):
        ori_image_np = np.array(ori_image)

        h, w = ori_image_np.shape[:2]
        l = int(l * w)
        r = int(r * w)
        t = int(t * h)
        b = int(b * h)

        mask_np = np.zeros((h, w), dtype=np.uint8)

        padded_ori_image_np = np.pad(ori_image_np, [(t, b), (l, r), (0, 0)], "constant", constant_values=0)
        padded_ori_image_np2 = np.pad(ori_image_np, [(t, b), (l, r), (0, 0)], "constant", constant_values=255)
        padded_mask_np = np.pad(mask_np, [(t, b), (l, r)], "constant", constant_values=255)

        return Image.fromarray(padded_ori_image_np2), Image.fromarray(padded_ori_image_np), Image.fromarray(padded_mask_np).convert("RGB")

    original_img = Image.fromarray(original_img[:,:,::-1])
    original_img, controlnet_img, mask = create_controlnet_outpaint_inputs(
        original_img, 
        0.2, 0.3, 0.2, 0.3
    )
    width, height = controlnet_img.size
    ratio = np.sqrt(1024. * 1024. / (width * height))
    W, H = int(width * ratio) // 8 * 8, int(height * ratio) // 8 * 8

    original_img = original_img.resize((W, H))
    controlnet_img = controlnet_img.resize((W, H))
    mask = mask.resize((W, H))
    print(f"H: {H}, W: {W}")

width, height = W, H

# 0 -- openpose
# 1 -- depth
# 2 -- hed/pidi/scribble/ted
# 3 -- canny/lineart/anime_lineart/mlsd
# 4 -- normal
# 5 -- segment
# 6 -- tile
# 7 -- repaint

original_img.save("original_img.jpg")
controlnet_img.save("controlnet_img.jpg")
mask.save("mask.png")

images = pipe(prompt=[prompt]*1,
            image=original_img,
            mask_image=mask,
            control_image_list=[0, 0, 0, 0, 0, 0, 0, controlnet_img], 
            negative_prompt=[negative_prompt]*1,
            # generator=generator,
            width=width, 
            height=height,
            num_inference_steps=30,
            strength=1.0,
            union_control=True,
            union_control_type=torch.Tensor([0, 0, 0, 0, 0, 0, 0, 1]),
            ).images

images = Image.fromarray(np.concatenate(list(map(np.array, images)), axis=1))
images.save("generated.jpg")
xiankgx commented 1 week ago

By looking at the original/input image, and how strength=0.9999 in the pipeline call in the original src code, it seems that outpainting worked because it is taking hint from the original pixels in the masked areas. While this work for trying to replace the masked area with something very similar in inpainting, it does not work to generate new content for outpainting.

What do you think?