Open ziweizhao1993 opened 5 years ago
Yes, exactly. I'm attaching part of the code that was used below. Hope that helps.
Also, you should know that if you start noticing unexpected numbers, don't be alarmed. I'm in the process of clarifying some problems with the numbers presented in the CharadesEgo paper, it looks like there was some mix-up between using Charades_v0 and Charades_v1 for testing. See https://github.com/gsig/actor-observer/issues/7 for discussion.
import torch
from torch.autograd import Variable
import torch.nn as nn
from models.utils import load_sub_architecture
class CopyBaselineModel(nn.Module):
def __init__(self, teacher, student):
super(CopyBaselineModel, self).__init__()
self.teacher = teacher
self.student = student
self.basenet = self.student
def forward(self, x, y, z):
""" assuming:
x: first person positive
y: third person
z: first person negative
"""
s = self.student(x)
y = y.detach()
t = self.teacher(y)
return Variable(torch.ones(s.shape[0])), Variable(torch.ones(s.shape[0])), t, s
class CopyBaseline(CopyBaselineModel):
def __init__(self, args):
model = load_sub_architecture(args)
model2 = load_sub_architecture(args)
super(CopyBaseline, self).__init__(model, model2)
import torch
import torch.nn as nn
from models.layers.BlockGradient import BlockGradient
def nll_loss(soft_target, logdist, reduce=True):
# @Hongyi_Zhang
# assumes soft_target is normalized to 1 and between [0,1]
# logdist is a (normalized) log distribution
soft_target, = BlockGradient.apply(soft_target)
if soft_target.dim() == 3:
out = (-soft_target * logdist).sum(2).sum(1)
else:
out = (-soft_target * logdist).sum(1)
if reduce:
return out.mean()
else:
return out
class CopyBaselineLoss(nn.Module):
def __init__(self, args):
super(CopyBaselineLoss, self).__init__()
self.clsweight = args.clsweight
self.softmax = nn.Softmax()
self.logsoftmax = nn.LogSoftmax()
def cross_entropy(self, log_target, log_pred, reduce=True):
return nll_loss(self.softmax(log_target), self.logsoftmax(log_pred), reduce)
def forward(self, dummy1, dummy2, teacher, student, target, ids):
loss = self.cross_entropy(teacher, student)
return loss, torch.ones(target.shape[0])
Hi Gunnar,
I was just wondering what "ResNet-152 Transfer" stands for. The papers said "It uses the Charades model to predict the activities in the third person video, and then uses those labels as supervision for the first-person video."
Does it mean you run the Charades model on CharadesEgo_only3rd videos, then warp the labels to their corresponding 1st person video, and use the labels as supervision to finetune the original Charades model?
Thank you.
Best, Ziwei