weixiong-ur / mdgan

official code of CVPR'18 paper "learning to generate time-lapse videos using multi-stage dynamic generative adversarial networks"
43 stars 7 forks source link

Evaluation for Single Frames #4

Open mdorkenw opened 4 years ago

mdorkenw commented 4 years ago

Unfortunately, your inference code doesn't work for single image evaluation. Hence, I alternated your test.py file for evaluating single frames as input (see below). Could you please have a look if everything is correct because the generations look badly even for images from your test set.

from __future__ import print_function
import argparse
import os
import random
import torch
import torch.nn as nn
import torch.backends.cudnn as cudnn
import torch.optim as optim
import torch.utils.data
import torchvision.datasets as dset
import torchvision.transforms as transforms
import torchvision.utils as vutils
from torch.autograd import Variable
import sklearn.datasets
import time
from PIL import ImageFile
from PIL import Image
from video_folder_test import VideoFolder
from torch.utils.data import DataLoader
from Dynamic_Model import MDGAN_S1_G, MDGAN_S2_G, SubsetRandomSampler
ImageFile.LOAD_TRUNCATED_IMAGES = True
cudnn.benchmark = True

parser = argparse.ArgumentParser()
parser.add_argument('--netG_S2', 
    default='./netG_S2_067.pth', 
    help='path to netG Stage 2')
parser.add_argument('--netG_S1', 
    default='./netG_S1_030.pth', 
    help='path to netG Stage 1')
parser.add_argument('--cuda',  action='store_true', default=False, 
    help='enables cuda')
parser.add_argument('--outf', default = './results', 
    help='output folder')
parser.add_argument('--test_img', help='path to test img')
opt = parser.parse_args()

localtime = time.asctime( time.localtime(time.time()) )
print('\n Run evaluation on single frame')

cuda = opt.cuda
test_img = opt.test_img
output_path = opt.outf
if not os.path.exists(output_path):
    os.mkdir(output_path)

imageSize = 128
netG_S1 = MDGAN_S1_G(32)
netG_S1_path = opt.netG_S1
netG_S2 = MDGAN_S2_G(32)
netG_S2_path = opt.netG_S2

# Load models
netG_S1.load_state_dict(torch.load(opt.netG_S1))
netG_S2.load_state_dict(torch.load(opt.netG_S2))

## Load image with transformatio
transf_ =transforms.Compose([
                    transforms.Resize( (imageSize, imageSize) ),
                    transforms.ToTensor(),
                    transforms.Normalize((0.5, 0.5, 0.5),
                        (0.5, 0.5, 0.5)),
                        ])

val_gt = transf_(Image.open(test_img)).view(1,3,128,128)

if cuda:
    netG_S1.cuda()
    netG_S2.cuda()
    val_gt = val_gt.cuda()

netG_S1.train() ## WHY TRAIN not EVAL?
netG_S2.train()

val_video   = val_gt.unsqueeze(2).repeat(1,1,32,1,1)    
val_fake_s1 = netG_S1(Variable(val_video))
val_fake_s2 = netG_S2(val_fake_s1) # size: batchsize * 3 * 32 * 64 *64
val_fake_s1 = val_fake_s1.data.permute(2,0,1,3,4) # permute to 32 * batchsize * 3 *64 *64
val_fake_s2 = val_fake_s2.data.permute(2,0,1,3,4)

# save fake samples of stage 1
for t in range(val_fake_s1.size(0)):
    vutils.save_image(val_fake_s1[t], 
        '%s/samples_s1_frame_%03d.png'
        % (opt.outf, t),normalize=True, 
        nrow = 8)                
# save fake samples of stage 2
for t in range(val_fake_s2.size(0)):
    vutils.save_image(val_fake_s2[t],
        '%s/samples_s2_frame_%03d.png'
        % (opt.outf, t),normalize=True, 
        nrow = 8)

def generate_video(model='s1', outf= opt.outf):
    img_path = os.path.join(outf, 'samples_' + model +  '_frame_%03d.png')
    mp4_path = os.path.join(outf, model+ '_video.mp4')
    cmd = ('ffmpeg -loglevel warning -framerate 25 -i ' + img_path + 
        ' -qscale:v 2 -y ' + mp4_path )
    print(cmd)
    os.system(cmd)
generate_video('s1')
generate_video('s2')
weixiong-ur commented 4 years ago

Unfortunately, your inference code doesn't work for single image evaluation. Hence, I alternated your test.py file for evaluating single frames as input (see below). Could you please have a look if everything is correct because the generations look badly even for images from your test set.

from __future__ import print_function
import argparse
import os
import random
import torch
import torch.nn as nn
import torch.backends.cudnn as cudnn
import torch.optim as optim
import torch.utils.data
import torchvision.datasets as dset
import torchvision.transforms as transforms
import torchvision.utils as vutils
from torch.autograd import Variable
import sklearn.datasets
import time
from PIL import ImageFile
from PIL import Image
from video_folder_test import VideoFolder
from torch.utils.data import DataLoader
from Dynamic_Model import MDGAN_S1_G, MDGAN_S2_G, SubsetRandomSampler
ImageFile.LOAD_TRUNCATED_IMAGES = True
cudnn.benchmark = True

parser = argparse.ArgumentParser()
parser.add_argument('--netG_S2', 
  default='./netG_S2_067.pth', 
  help='path to netG Stage 2')
parser.add_argument('--netG_S1', 
  default='./netG_S1_030.pth', 
  help='path to netG Stage 1')
parser.add_argument('--cuda',  action='store_true', default=False, 
  help='enables cuda')
parser.add_argument('--outf', default = './results', 
  help='output folder')
parser.add_argument('--test_img', help='path to test img')
opt = parser.parse_args()

localtime = time.asctime( time.localtime(time.time()) )
print('\n Run evaluation on single frame')

cuda = opt.cuda
test_img = opt.test_img
output_path = opt.outf
if not os.path.exists(output_path):
  os.mkdir(output_path)

imageSize = 128
netG_S1 = MDGAN_S1_G(32)
netG_S1_path = opt.netG_S1
netG_S2 = MDGAN_S2_G(32)
netG_S2_path = opt.netG_S2

# Load models
netG_S1.load_state_dict(torch.load(opt.netG_S1))
netG_S2.load_state_dict(torch.load(opt.netG_S2))

## Load image with transformatio
transf_ =transforms.Compose([
                  transforms.Resize( (imageSize, imageSize) ),
                  transforms.ToTensor(),
                  transforms.Normalize((0.5, 0.5, 0.5),
                      (0.5, 0.5, 0.5)),
                      ])

val_gt = transf_(Image.open(test_img)).view(1,3,128,128)

if cuda:
  netG_S1.cuda()
  netG_S2.cuda()
  val_gt = val_gt.cuda()

netG_S1.train() ## WHY TRAIN not EVAL?
netG_S2.train()

val_video   = val_gt.unsqueeze(2).repeat(1,1,32,1,1)  
val_fake_s1 = netG_S1(Variable(val_video))
val_fake_s2 = netG_S2(val_fake_s1) # size: batchsize * 3 * 32 * 64 *64
val_fake_s1 = val_fake_s1.data.permute(2,0,1,3,4) # permute to 32 * batchsize * 3 *64 *64
val_fake_s2 = val_fake_s2.data.permute(2,0,1,3,4)

# save fake samples of stage 1
for t in range(val_fake_s1.size(0)):
  vutils.save_image(val_fake_s1[t], 
      '%s/samples_s1_frame_%03d.png'
      % (opt.outf, t),normalize=True, 
      nrow = 8)                
# save fake samples of stage 2
for t in range(val_fake_s2.size(0)):
  vutils.save_image(val_fake_s2[t],
      '%s/samples_s2_frame_%03d.png'
      % (opt.outf, t),normalize=True, 
      nrow = 8)

def generate_video(model='s1', outf= opt.outf):
  img_path = os.path.join(outf, 'samples_' + model +  '_frame_%03d.png')
  mp4_path = os.path.join(outf, model+ '_video.mp4')
  cmd = ('ffmpeg -loglevel warning -framerate 25 -i ' + img_path + 
      ' -qscale:v 2 -y ' + mp4_path )
  print(cmd)
  os.system(cmd)
generate_video('s1')
generate_video('s2')

Hi Donk10,

It may be caused by the batch norm layer, which is sensitive to the batchsize. I will try to replicate the testing with batchsize to be 1. Can you use model.eval() instead of model.train() to perform the prediction?