Evaluation for Single Frames

weixiong-ur / mdgan

official code of CVPR'18 paper "learning to generate time-lapse videos using multi-stage dynamic generative adversarial networks"

43 stars 7 forks source link

from __future__ import print_function import argparse import os import random import torch import torch.nn as nn import torch.backends.cudnn as cudnn import torch.optim as optim import torch.utils.data import torchvision.datasets as dset import torchvision.transforms as transforms import torchvision.utils as vutils from torch.autograd import Variable import sklearn.datasets import time from PIL import ImageFile from PIL import Image from video_folder_test import VideoFolder from torch.utils.data import DataLoader from Dynamic_Model import MDGAN_S1_G, MDGAN_S2_G, SubsetRandomSampler ImageFile.LOAD_TRUNCATED_IMAGES = True cudnn.benchmark = True parser = argparse.ArgumentParser() parser.add_argument('--netG_S2', default='./netG_S2_067.pth', help='path to netG Stage 2') parser.add_argument('--netG_S1', default='./netG_S1_030.pth', help='path to netG Stage 1') parser.add_argument('--cuda', action='store_true', default=False, help='enables cuda') parser.add_argument('--outf', default = './results', help='output folder') parser.add_argument('--test_img', help='path to test img') opt = parser.parse_args() localtime = time.asctime( time.localtime(time.time()) ) print('\n Run evaluation on single frame') cuda = opt.cuda test_img = opt.test_img output_path = opt.outf if not os.path.exists(output_path): os.mkdir(output_path) imageSize = 128 netG_S1 = MDGAN_S1_G(32) netG_S1_path = opt.netG_S1 netG_S2 = MDGAN_S2_G(32) netG_S2_path = opt.netG_S2 # Load models netG_S1.load_state_dict(torch.load(opt.netG_S1)) netG_S2.load_state_dict(torch.load(opt.netG_S2)) ## Load image with transformatio transf_ =transforms.Compose([ transforms.Resize( (imageSize, imageSize) ), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ]) val_gt = transf_(Image.open(test_img)).view(1,3,128,128) if cuda: netG_S1.cuda() netG_S2.cuda() val_gt = val_gt.cuda() netG_S1.train() ## WHY TRAIN not EVAL? netG_S2.train() val_video = val_gt.unsqueeze(2).repeat(1,1,32,1,1) val_fake_s1 = netG_S1(Variable(val_video)) val_fake_s2 = netG_S2(val_fake_s1) # size: batchsize * 3 * 32 * 64 *64 val_fake_s1 = val_fake_s1.data.permute(2,0,1,3,4) # permute to 32 * batchsize * 3 *64 *64 val_fake_s2 = val_fake_s2.data.permute(2,0,1,3,4) # save fake samples of stage 1 for t in range(val_fake_s1.size(0)): vutils.save_image(val_fake_s1[t], '%s/samples_s1_frame_%03d.png' % (opt.outf, t),normalize=True, nrow = 8) # save fake samples of stage 2 for t in range(val_fake_s2.size(0)): vutils.save_image(val_fake_s2[t], '%s/samples_s2_frame_%03d.png' % (opt.outf, t),normalize=True, nrow = 8) def generate_video(model='s1', outf= opt.outf): img_path = os.path.join(outf, 'samples_' + model + '_frame_%03d.png') mp4_path = os.path.join(outf, model+ '_video.mp4') cmd = ('ffmpeg -loglevel warning -framerate 25 -i ' + img_path + ' -qscale:v 2 -y ' + mp4_path ) print(cmd) os.system(cmd) generate_video('s1') generate_video('s2')

Unfortunately, your inference code doesn't work for single image evaluation. Hence, I alternated your test.py file for evaluating single frames as input (see below). Could you please have a look if everything is correct because the generations look badly even for images from your test set.

from __future__ import print_function
import argparse
import os
import random
import torch
import torch.nn as nn
import torch.backends.cudnn as cudnn
import torch.optim as optim
import torch.utils.data
import torchvision.datasets as dset
import torchvision.transforms as transforms
import torchvision.utils as vutils
from torch.autograd import Variable
import sklearn.datasets
import time
from PIL import ImageFile
from PIL import Image
from video_folder_test import VideoFolder
from torch.utils.data import DataLoader
from Dynamic_Model import MDGAN_S1_G, MDGAN_S2_G, SubsetRandomSampler
ImageFile.LOAD_TRUNCATED_IMAGES = True
cudnn.benchmark = True

parser = argparse.ArgumentParser()
parser.add_argument('--netG_S2', 
  default='./netG_S2_067.pth', 
  help='path to netG Stage 2')
parser.add_argument('--netG_S1', 
  default='./netG_S1_030.pth', 
  help='path to netG Stage 1')
parser.add_argument('--cuda',  action='store_true', default=False, 
  help='enables cuda')
parser.add_argument('--outf', default = './results', 
  help='output folder')
parser.add_argument('--test_img', help='path to test img')
opt = parser.parse_args()

localtime = time.asctime( time.localtime(time.time()) )
print('\n Run evaluation on single frame')

cuda = opt.cuda
test_img = opt.test_img
output_path = opt.outf
if not os.path.exists(output_path):
  os.mkdir(output_path)

imageSize = 128
netG_S1 = MDGAN_S1_G(32)
netG_S1_path = opt.netG_S1
netG_S2 = MDGAN_S2_G(32)
netG_S2_path = opt.netG_S2

# Load models
netG_S1.load_state_dict(torch.load(opt.netG_S1))
netG_S2.load_state_dict(torch.load(opt.netG_S2))

## Load image with transformatio
transf_ =transforms.Compose([
                  transforms.Resize( (imageSize, imageSize) ),
                  transforms.ToTensor(),
                  transforms.Normalize((0.5, 0.5, 0.5),
                      (0.5, 0.5, 0.5)),
                      ])

val_gt = transf_(Image.open(test_img)).view(1,3,128,128)

if cuda:
  netG_S1.cuda()
  netG_S2.cuda()
  val_gt = val_gt.cuda()

netG_S1.train() ## WHY TRAIN not EVAL?
netG_S2.train()

val_video   = val_gt.unsqueeze(2).repeat(1,1,32,1,1)  
val_fake_s1 = netG_S1(Variable(val_video))
val_fake_s2 = netG_S2(val_fake_s1) # size: batchsize * 3 * 32 * 64 *64
val_fake_s1 = val_fake_s1.data.permute(2,0,1,3,4) # permute to 32 * batchsize * 3 *64 *64
val_fake_s2 = val_fake_s2.data.permute(2,0,1,3,4)

# save fake samples of stage 1
for t in range(val_fake_s1.size(0)):
  vutils.save_image(val_fake_s1[t], 
      '%s/samples_s1_frame_%03d.png'
      % (opt.outf, t),normalize=True, 
      nrow = 8)                
# save fake samples of stage 2
for t in range(val_fake_s2.size(0)):
  vutils.save_image(val_fake_s2[t],
      '%s/samples_s2_frame_%03d.png'
      % (opt.outf, t),normalize=True, 
      nrow = 8)

def generate_video(model='s1', outf= opt.outf):
  img_path = os.path.join(outf, 'samples_' + model +  '_frame_%03d.png')
  mp4_path = os.path.join(outf, model+ '_video.mp4')
  cmd = ('ffmpeg -loglevel warning -framerate 25 -i ' + img_path + 
      ' -qscale:v 2 -y ' + mp4_path )
  print(cmd)
  os.system(cmd)
generate_video('s1')
generate_video('s2')

Hi Donk10,

It may be caused by the batch norm layer, which is sensitive to the batchsize. I will try to replicate the testing with batchsize to be 1. Can you use model.eval() instead of model.train() to perform the prediction?

weixiong-ur / mdgan

Evaluation for Single Frames #4