LeslieZhoa / GFPGAN-1024

GFPGAN 1024
MIT License
42 stars 8 forks source link

Loss exploded on stage 2 #5

Closed luser350 closed 2 weeks ago

luser350 commented 6 months ago

Hi, I have been trying to train ffhqr at 1024 x 1024 resolution. Here are the changes in my config file. I am using ffhq as low-quality images.

class Params:
    def __init__(self):

        self.name = 'GFPGAN'
        self.mode = 'encoder'
        self.pretrain_path = 'GFPGAN-1024/checkpoint/GFPGAN/decoder/001-00027000.pth'
        self.scratch_gan_path = 'pretrained_models/GFPGANv1.4.pth'
        self.scratch_d_path = 'pretrained_models/GFPGANv1_net_d.pth'
        self.scratch_left_eye_path = 'pretrained_models/GFPGANv1_net_d_left_eye.pth'
        self.scratch_right_eye_path = 'pretrained_models/GFPGANv1_net_d_right_eye.pth'
        self.scratch_mouth_path = 'pretrained_models/GFPGANv1_net_d_mouth.pth'
        self.id_model = 'pretrained_models/arcface_resnet18.pth'
        self.img_root = "output/train/ffhqr" 
        self.train_hq_root = "output/train/ffhqr" 
        self.train_lq_root = 'output/train/ffhq'
        self.train_lmk_base = '' # lmk info
        self.val_lmk_base = ''
        self.val_lq_root = 'output/val/ffhq'
        self.val_hq_root = 'output/val/ffhqr'
        self.g_lr = 1e-3

I have commented out lines 210 to 228 in dataloader/GFPLoader.py since I provide low-quality images, I did not need to produce them.

# ------------------------ generate lq image ------------------------ #
        # blur
        '''
        if not self.eval:
            kernel = degradations.random_mixed_kernels(
                self.kernel_list,
                self.kernel_prob,
                self.blur_kernel_size,
                self.blur_sigma,
                self.blur_sigma, [-math.pi, math.pi],
                noise_range=None)

            img_lq = cv2.filter2D(img_lq, -1, kernel)
            # downsample
            scale = np.random.uniform(self.downsample_range[0], self.downsample_range[1])
            img_lq = cv2.resize(img_lq, (int(w // scale), int(h // scale)), interpolation=cv2.INTER_LINEAR)
            # # noise
            if self.noise_range is not None:
                img_lq = degradations.random_add_gaussian_noise(img_lq, self.noise_range)
            # jpeg compression
            if self.jpeg_range is not None:
                img_lq = degradations.random_add_jpg_compression(img_lq, self.jpeg_range)

        '''
        # resize to original size
        img_lq = cv2.resize(img_lq, (512, 512), interpolation=cv2.INTER_LINEAR)
        img_hq = cv2.resize(img_hq, (1024, 1024))

Why img_lq is resized to 512x512 because I want to train on ffhq @ 1024x1024 to produce ffhqr 1024x1024?