waleedka / hiddenlayer

Neural network graphs and training metrics for PyTorch, Tensorflow, and Keras.
MIT License
1.79k stars 266 forks source link

Pointer Network - 2 Inputs issue #20

Open LSOFT123 opened 5 years ago

LSOFT123 commented 5 years ago

When calling:

hl_graph = hl.build_graph(pointer, (inputs, target))

with inputs: tensor([[3, 7, 2, 9, 0, 1, 8, 5, 4, 6]]) and target: tensor([[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]]) I get the following error:

/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in _slow_forward(self, *input, **kwargs)
    463         tracing_state._traced_module_stack.append(self)
    464         try:
--> 465             result = self.forward(*input, **kwargs)
    466         finally:
    467             tracing_state.pop_scope()

<ipython-input-217-844f2c5b2d6a> in forward(self, inputs, target)
     80 
     81 
---> 82         return loss / seq_len

RuntimeError: Expected object of type torch.FloatTensor but found type torch.LongTensor for argument #2 'other'

And PointerNetwork is:


class PointerNet(nn.Module):
    def __init__(self, 
            embedding_size,
            hidden_size,
            seq_len,
            n_glimpses,
            tanh_exploration,
            use_tanh,
            use_cuda=USE_CUDA):
        super(PointerNet, self).__init__()

        self.embedding_size = embedding_size
        self.hidden_size    = hidden_size
        self.n_glimpses     = n_glimpses
        self.seq_len        = seq_len
        self.use_cuda       = use_cuda

        self.embedding = nn.Embedding(seq_len, embedding_size)
        self.encoder = nn.LSTM(embedding_size, hidden_size, batch_first=True)
        self.decoder = nn.LSTM(embedding_size, hidden_size, batch_first=True)
        self.pointer = Attention(hidden_size, use_tanh=use_tanh, C=tanh_exploration, use_cuda=use_cuda)
        self.glimpse = Attention(hidden_size, use_tanh=False, use_cuda=use_cuda)

        self.decoder_start_input = nn.Parameter(torch.FloatTensor(embedding_size))
        self.decoder_start_input.data.uniform_(-(1. / math.sqrt(embedding_size)), 1. / math.sqrt(embedding_size))

        self.criterion = nn.CrossEntropyLoss()

    def apply_mask_to_logits(self, logits, mask, idxs): 
        batch_size = logits.size(0)
        clone_mask = mask.clone()

        if idxs is not None:
            clone_mask[[i for i in range(batch_size)], idxs.data] = 1
            logits[clone_mask] = -np.inf
        return logits, clone_mask

    def forward(self, inputs, target):
        """
        Args: 
            inputs: [batch_size x sourceL]
        """
        batch_size = inputs.size(0)
        seq_len    = inputs.size(1)
        assert seq_len == self.seq_len

        embedded = self.embedding(inputs)
        target_embedded = self.embedding(target)
        encoder_outputs, (hidden, context) = self.encoder(embedded)

        mask = torch.zeros(batch_size, seq_len).byte()
        if self.use_cuda:
            mask = mask.cuda()

        idxs = None

        decoder_input = self.decoder_start_input.unsqueeze(0).repeat(batch_size, 1)

        loss = 0

        for i in range(seq_len):

            _, (hidden, context) = self.decoder(decoder_input.unsqueeze(1), (hidden, context))

            query = hidden.squeeze(0)
            for i in range(self.n_glimpses):
                ref, logits = self.glimpse(query, encoder_outputs)
                logits, mask = self.apply_mask_to_logits(logits, mask, idxs)
                query = torch.bmm(ref, F.softmax(logits).unsqueeze(2)).squeeze(2) 

            _, logits = self.pointer(query, encoder_outputs)
            logits, mask = self.apply_mask_to_logits(logits, mask, idxs)

            decoder_input = target_embedded[:,i,:]

            loss += self.criterion(logits, target[:,i])

        return loss / seq_len
waleedka commented 5 years ago

Are you sure you're feeding tensors of the correct data type? It seems to be complaining that it's receiving Long when it's expecting a float.