modelscope / scepter

SCEPTER is an open-source framework used for training, fine-tuning, and inference with generative models.
https://github.com/modelscope/scepter
Apache License 2.0
413 stars 22 forks source link

When will release LAR full project version #57

Open mycfhs opened 1 hour ago

mycfhs commented 1 hour ago

你好,我在使用LAR去生成图像的时候,结果会比较抽象,尤其是真是图像的结果与论文相差较多。此外,何时会补全Refiner部分的模型代码呢? 下面是我在infer的代码

from torchvision.utils import save_image
from scepter.modules.utils.config import Config
from scepter.modules.utils.file_system import FS
from scepter.modules.utils.logger import get_logger
from scepter.modules.inference.largen_inference import LargenInference

from scepter.studio.inference.inference_ui.largen_ui import LargenUI

from PIL import Image
import numpy as np

output_height, output_width = 1024, 1024

tar_image = Image.open(f"asset/images/inpainting_text_ref/ex4_scene_im.jpg").convert("RGB")
tar_mask = Image.open(f"asset/images/inpainting_text_ref/ex4_scene_mask.jpg").convert("L") ref_image = Image.open(f"asset/images/inpainting_text_ref/ex4_subject_im.jpg").convert("RGB")
ref_mask = Image.open(f"asset/images/inpainting_text_ref/ex4_subject_mask.jpg").convert("L")

ref_image = np.asarray(ref_image)
ref_mask = np.asarray(ref_mask)
ref_mask = np.where(ref_mask > 128, 1, 0).astype(np.uint8)

tar_image = np.asarray(tar_image)
tar_mask = np.asarray(tar_mask)
tar_mask = np.where(tar_mask > 128, 1, 0).astype(np.uint8)

data = LargenUI.data_preprocess_inpaint(
    None, tar_image, tar_mask, ref_image, ref_mask, True, 1.3, output_height, output_width
)

# init file system - modelscope
# FS.TEMP_DIRinit_fs_client(Config(load=False, cfg_dict={'NAME': 'ModelscopeFs', 'TEMP_DIR': 'cache/data'}))
FS.TEMP_DIRinit_fs_client(
    Config(load=False, cfg_dict={"NAME": "ModelscopeFs", "TEMP_DIR": "cache/cache_data"})
)   # 新版本改名字了hhh。 ui里面保存到cache data。我们就用之前下载好的,不然得重新下载。 这个在scepter_ui.yaml里面
FS.TEMP_DIRinit_fs_client(
    Config(load=False, cfg_dict={"NAME": "HttpFs", "TEMP_DIR": "cache/cache_data"})
)

# init model config
logger = get_logger(name='scepter')
cfg = Config(cfg_file='scepter/methods/studio/inference/largen/largen_pro.yaml')
largen_infer = LargenInference(logger)
largen_infer.init_from_cfg(cfg)

input_config = {
    "image": None,
    "original_size_as_tuple": [1024, 1024],
    "target_size_as_tuple": [1024, 1024],
    "aesthetic_score": 6.0,
    "negative_aesthetic_score": 2.5,
    # "prompt": "a photo of a backpack",
    "prompt": "a backpack",
    "negative_prompt": "",
    "prompt_prefix": "",
    "crop_coords_top_left": [0, 0],
    "sample": "ddim",
    "sample_steps": 50,
    "guide_scale": 7.5,
    "guide_rescale": 0,
    "discretization": "trailing",
    "refine_sample": "ddim",
    "refine_guide_scale": 7.5,
    "refine_guide_rescale": 0.5,
    "refine_discretization": "trailing",
}
# start inference
output = largen_infer(
    input=input_config,
    num_samples=1,
    intermediate_callback=None,
    refine_strength=0,
    cat_uc=True,
    largen_state=True,
    largen_task="Text_Subject_Guided_Inpainting",
    largen_image_scale=1,
    largen_tar_image=data[0],
    largen_tar_mask=data[1],
    largen_masked_image=data[2],
    largen_ref_image=data[3],
    largen_ref_mask=data[4],
    largen_ref_clip=data[5],
    largen_base_image=data[6],
    largen_extra_sizes=data[7],
    largen_bbox_yyxx=data[8],
)

save_image(output["images"], "test.png")
mycfhs commented 1 hour ago

1619065+a photo of a  yellow duck toy +42 1664819+a photo of a  yellow duck toy +42 9592+a photo of a  yellow duck toy +42 会出现这种大片白色的抽象情况,或者质量很差。但是对于给的exapmle图像组就结果比较正常 6fce2010-8911-452b-8a4b-3605d8906a91