gsig / actor-observer

ActorObserverNet code in PyTorch from "Actor and Observer: Joint Modeling of First and Third-Person Videos", CVPR 2018
GNU General Public License v3.0
76 stars 9 forks source link

What does ResNet-152 Transfer mean? #6

Open ziweizhao1993 opened 5 years ago

ziweizhao1993 commented 5 years ago

Hi Gunnar,

I was just wondering what "ResNet-152 Transfer" stands for. The papers said "It uses the Charades model to predict the activities in the third person video, and then uses those labels as supervision for the first-person video."

Does it mean you run the Charades model on CharadesEgo_only3rd videos, then warp the labels to their corresponding 1st person video, and use the labels as supervision to finetune the original Charades model?

Thank you.

Best, Ziwei

gsig commented 5 years ago

Yes, exactly. I'm attaching part of the code that was used below. Hope that helps.

Also, you should know that if you start noticing unexpected numbers, don't be alarmed. I'm in the process of clarifying some problems with the numbers presented in the CharadesEgo paper, it looks like there was some mix-up between using Charades_v0 and Charades_v1 for testing. See https://github.com/gsig/actor-observer/issues/7 for discussion.

import torch
from torch.autograd import Variable
import torch.nn as nn
from models.utils import load_sub_architecture

class CopyBaselineModel(nn.Module):
    def __init__(self, teacher, student):
        super(CopyBaselineModel, self).__init__()
        self.teacher = teacher
        self.student = student
        self.basenet = self.student

    def forward(self, x, y, z):
        """ assuming:
            x: first person positive
            y: third person
            z: first person negative
        """
        s = self.student(x)
        y = y.detach()
        t = self.teacher(y)
        return Variable(torch.ones(s.shape[0])), Variable(torch.ones(s.shape[0])), t, s

class CopyBaseline(CopyBaselineModel):
    def __init__(self, args):
        model = load_sub_architecture(args)
        model2 = load_sub_architecture(args)
        super(CopyBaseline, self).__init__(model, model2)
import torch
import torch.nn as nn
from models.layers.BlockGradient import BlockGradient

def nll_loss(soft_target, logdist, reduce=True):
    # @Hongyi_Zhang
    # assumes soft_target is normalized to 1 and between [0,1]
    # logdist is a (normalized) log distribution
    soft_target, = BlockGradient.apply(soft_target)
    if soft_target.dim() == 3:
        out = (-soft_target * logdist).sum(2).sum(1)
    else:
        out = (-soft_target * logdist).sum(1)
    if reduce:
        return out.mean()
    else:
        return out

class CopyBaselineLoss(nn.Module):
    def __init__(self, args):
        super(CopyBaselineLoss, self).__init__()
        self.clsweight = args.clsweight
        self.softmax = nn.Softmax()
        self.logsoftmax = nn.LogSoftmax()

    def cross_entropy(self, log_target, log_pred, reduce=True):
        return nll_loss(self.softmax(log_target), self.logsoftmax(log_pred), reduce)

    def forward(self, dummy1, dummy2, teacher, student, target, ids):
        loss = self.cross_entropy(teacher, student)
        return loss, torch.ones(target.shape[0])