mit-han-lab / anycost-gan

[CVPR 2021] Anycost GANs for Interactive Image Synthesis and Editing
https://hanlab.mit.edu/projects/anycost-gan/
MIT License
776 stars 98 forks source link

wrong image generate by using config: stylegan2- #23

Closed gongmm closed 2 years ago

gongmm commented 2 years ago
import torch
import numpy as np
import os
from PIL import Image
from models.dynamic_channel import set_uniform_channel_ratio, reset_generator
import models

class FaceEditor:
    def __init__(self, config, device, anycost_resolution=1024, n_style_to_change=12):
        # load assets
        self.device = device
        self.anycost_channel = 1.0
        self.anycost_resolution = anycost_resolution
        self.n_style_to_change = n_style_to_change

        # build the generator
        self.generator = models.get_pretrained('generator', config).to(device)
        self.generator.eval()
        set_uniform_channel_ratio(self.generator, 0.5)  # set channel
        self.generator.target_res = anycost_resolution  # set resolution
        # self.generator.target_res = self.anycost_resolution
        self.mean_latent = self.generator.mean_style(10000)

        # select only a subset of the directions to use
        '''
        possible keys:
        ['00_5_o_Clock_Shadow', '01_Arched_Eyebrows', '02_Attractive', '03_Bags_Under_Eyes', '04_Bald', '05_Bangs',
            '06_Big_Lips', '07_Big_Nose', '08_Black_Hair', '09_Blond_Hair', '10_Blurry', '11_Brown_Hair', '12_Bushy_Eyebrows',
            '13_Chubby', '14_Double_Chin', '15_Eyeglasses', '16_Goatee', '17_Gray_Hair', '18_Heavy_Makeup', '19_High_Cheekbones',
            '20_Male', '21_Mouth_Slightly_Open', '22_Mustache', '23_Narrow_Eyes', '24_No_Beard', '25_Oval_Face', '26_Pale_Skin',
            '27_Pointy_Nose', '28_Receding_Hairline', '29_Rosy_Cheeks', '30_Sideburns', '31_Smiling', '32_Straight_Hair',
            '33_Wavy_Hair', '34_Wearing_Earrings', '35_Wearing_Hat', '36_Wearing_Lipstick', '37_Wearing_Necklace',
            '38_Wearing_Necktie', '39_Young']
        '''

        direction_map = {
            'smiling': '31_Smiling',
            'young': '39_Young',
            'wavy hair': '33_Wavy_Hair',
            'gray hair': '17_Gray_Hair',
            'blonde hair': '09_Blond_Hair',
            'eyeglass': '15_Eyeglasses',
            'mustache': '22_Mustache',
        }

        boundaries = models.get_pretrained('boundary', config)
        self.direction_dict = dict()
        for k, v in boundaries.items():
            self.direction_dict[k] = v.view(1, 1, -1)

    def get_latent_code(self, latent_code_path):
        latent_code = torch.from_numpy(np.load(os.path.join(latent_code_path))).view(1, -1, 512)
        return latent_code

    def get_direction_dict(self, attr_weights):
        final_dict = {}
        for key, value in attr_weights.items():
            if value == 0:
                continue
            final_dict[key] = value * self.direction_dict[key]
        return final_dict

    def get_boundary_dict(self):
        return self.direction_dict

    def generate_image(self, save_path, input_kwargs):
        def image_to_np(x):
            assert x.shape[0] == 1
            x = x.squeeze(0).permute(1, 2, 0)
            x = (x + 1) * 0.5  # 0-1
            x = (x * 255).cpu().numpy().astype('uint8')
            return x

        with torch.no_grad():
            out = self.generator(**input_kwargs)[0].clamp(-1, 1)
            out = image_to_np(out)
            out = np.ascontiguousarray(out)
            img_pil = Image.fromarray(out)
            img_pil.save(save_path)

    def edit(self, latent_code_path, attr_sliders, force_full_g=False):
        latent_code = torch.from_numpy(np.load(os.path.join(latent_code_path))).view(1, -1, 512).to(self.device)
        # input kwargs for the generator

        edited_code = latent_code.clone()
        for direction_name in attr_sliders.keys():
            edited_code[:, :self.n_style_to_change] = edited_code[:, :self.n_style_to_change] \
                                                 + attr_sliders[direction_name] * self.direction_dict[
                                                     direction_name].to(self.device)

        edited_code = edited_code.to(self.device)
        if not force_full_g:
            set_uniform_channel_ratio(self.generator, self.anycost_channel)
            self.generator.target_res = self.anycost_resolution
        return latent_code, edited_code

if __name__ == '__main__':
    gan_config = 'stylegan2-ffhq-config-f'
    fe = FaceEditor(config=gan_config, device='cuda:0')
    latent_code = torch.from_numpy(np.load(os.path.join(latent_code_path))).view(1, -1, 512).to(self.device)
    ori_kwargs = {'styles': ori, 'noise': None, 'randomize_noise': False, 'input_is_style': True}

    fe.generate_image(save_path=ori_save_path, input_kwargs=ori_kwargs)

image generate by config anycost-ffhq-config-f is pretty fine, but there the image generate with config stylegan2-ffhq-config-f is wrong. How can I fix the bug? Thankyou image

tonylins commented 2 years ago

Hi, could you please provide a minimum reproducible code to help with debug?

Notice that we may need to use different style codes for AnycostGAN and StyleGAN. If you use the style code from AnycostGAN with StyleGAN, it might reproduce weird results.

gongmm commented 2 years ago

thank! When I used style codes for StyleGAN, the bug is solved. But I found another problem when changing the output resolution. For StyleGAN, when use the default resolution the output image is perfect image image But when chang the output resolution such as self.generator.target_res = 256 the output image go wrong. image

tonylins commented 2 years ago

Could you provide a code segment to reproduce the issue?

gongmm commented 2 years ago
from glob import glob
import torch
import numpy as np
import os
from PIL import Image
import models

class FaceEditor:
    def __init__(self, config, device, anycost_resolution=1024, n_style_to_change=12):
        # load assets
        self.device = device
        self.anycost_channel = 1.0
        self.anycost_resolution = anycost_resolution
        self.n_style_to_change = n_style_to_change

        # build the generator
        self.generator = models.get_pretrained('generator', config).to(device)
        self.generator.eval()
        self.generator.target_res = anycost_resolution  # set resolution
        # self.generator.target_res = self.anycost_resolution
        self.mean_latent = self.generator.mean_style(10000)

        # select only a subset of the directions to use

        direction_map = {
            'smiling': '31_Smiling',
            'young': '39_Young',
            'wavy hair': '33_Wavy_Hair',
            'gray hair': '17_Gray_Hair',
            'blonde hair': '09_Blond_Hair',
            'eyeglass': '15_Eyeglasses',
            'mustache': '22_Mustache',
        }

        boundaries = models.get_pretrained('boundary', config)
        self.direction_dict = dict()
        for k, v in boundaries.items():
            self.direction_dict[k] = v.view(1, 1, -1)

    def generate_image(self, save_path, input_kwargs):
        def image_to_np(x):
            assert x.shape[0] == 1
            x = x.squeeze(0).permute(1, 2, 0)
            x = (x + 1) * 0.5  # 0-1
            x = (x * 255).cpu().numpy().astype('uint8')
            return x

        with torch.no_grad():
            out = self.generator(**input_kwargs)[0].clamp(-1, 1)
            out = image_to_np(out)
            out = np.ascontiguousarray(out)
            img_pil = Image.fromarray(out)
            img_pil.save(save_path)

    def edit(self, latent_code_path, attr_sliders, force_full_g=False):
        latent_code = torch.from_numpy(np.load(os.path.join(latent_code_path))).view(1, -1, 512).to(self.device)
        # input kwargs for the generator

        edited_code = latent_code.clone()
        for direction_name in attr_sliders.keys():
            edited_code[:, :self.n_style_to_change] = edited_code[:, :self.n_style_to_change] \
                                                 + attr_sliders[direction_name] * self.direction_dict[
                                                     direction_name].to(self.device)

        edited_code = edited_code.to(self.device)

        return latent_code, edited_code

if __name__ == '__main__':
    gan_config = 'stylegan2-ffhq-config-f'
    fe = FaceEditor(config=gan_config, anycost_resolution=512, device='cuda:0')
    # fe = FaceEditor(config=gan_config, device='cuda:0')

    attr_sliders = {
        '31_Smiling': 0.2
    }

    latent_path = "demo.npy"

    ori, edit = fe.edit(latent_code_path=latent_path, attr_sliders=attr_sliders)
    ori_kwargs = {'styles': ori, 'noise': None, 'randomize_noise': False, 'input_is_style': True}
    edit_kwargs = {'styles': edit, 'noise': None, 'randomize_noise': False, 'input_is_style': True}

    fe.generate_image(save_path="origin.png", input_kwargs=ori_kwargs)
    fe.generate_image(save_path="edit.png", input_kwargs=edit_kwargs)

demo.npy is here : https://drive.google.com/file/d/1cx_6epZjIH9uoADo744ySQQan15dL4Fa/view?usp=sharing this is the output image image when changing anycost_resolution to 1024 the image turn to image

tonylins commented 2 years ago

Hi, sorry for the late reply. You are using the original StyleGAN2 instead of AnycostGAN.

You need to replace gan_config = 'stylegan2-ffhq-config-f' with gan_config = 'anycost-ffhq-config-f'.

I tested it and it works normally, so I will close the issue for now. But feel free to reopen if you encounter other problems.