Element-Research / rnn

Recurrent Neural Network library for Torch7's nn
BSD 3-Clause "New" or "Revised" License
941 stars 313 forks source link

Can't forward input in decoder after training #390

Closed GHamrouni closed 7 years ago

GHamrouni commented 7 years ago

Hi,

I was modifying the coupled encoder and decoder example in order to perform sequence sampling. I noticed that, before performing training the decoder can forward the GO symbol without any error : dec:forward(torch.Tensor{1}) But after the training phase dec:forward(torch.Tensor{1}) crashes systematically. Do you have any suggestion ?

Here is the code :

--[[
Example of "coupled" separate encoder and decoder networks, e.g. for sequence-to-sequence networks.
]]--

require 'rnn'

version = 1.4 -- Uses [get,set]GradHiddenState for LSTM

opt = {}
opt.learningRate = 0.1
opt.hiddenSize = 40
opt.vector_dim = 20
opt.numLayers = 1
opt.vocabSize = 100
opt.seqLen = 6 -- length of the encoded sequence (with padding)
opt.niter = 5
opt.max_out = 8

--[[ Forward coupling: Copy encoder cell and output to decoder LSTM ]]--
function forwardConnect(enc, dec)
   for i=1,#enc.lstmLayers do
         dec.lstmLayers[i].userPrevOutput = nn.rnn.recursiveCopy(dec.lstmLayers[i].userPrevOutput, enc.lstmLayers[i].outputs[opt.seqLen])
         dec.lstmLayers[i].userPrevCell = nn.rnn.recursiveCopy(dec.lstmLayers[i].userPrevCell, enc.lstmLayers[i].cells[opt.seqLen])
   end
end

--[[ Backward coupling: Copy decoder gradients to encoder LSTM ]]--
function backwardConnect(enc, dec)
   for i=1,#enc.lstmLayers do
         enc:setGradHiddenState(opt.seqLen, dec:getGradHiddenState(0))
   end
end

lstmLayersEnc = {}
lstmLayersEnc[1] = nn.FastLSTM(opt.vector_dim, opt.hiddenSize):maskZero(1)
enc = nn.MaskZero(nn.Sequential()
                :add(nn.LookupTableMaskZero(opt.vocabSize, opt.vector_dim))
                :add(nn.SplitTable(1, 2))
                :add(nn.Sequencer(lstmLayersEnc[1]))
                :add(nn.SelectTable(-1)), 1)
enc:zeroGradParameters()

enc.lstmLayers = lstmLayersEnc

lstmLayers = {}
lstmLayers[1] = nn.FastLSTM(opt.vector_dim, opt.hiddenSize):maskZero(1)

dec = nn.MaskZero(nn.Sequential()
                    :add(nn.LookupTableMaskZero(opt.vocabSize, opt.vector_dim))
                    :add(nn.SplitTable(1, 2))
                    :add(nn.Sequencer(lstmLayers[1]))
                    :add(nn.Sequencer(nn.Linear(opt.hiddenSize, opt.vocabSize)))
                    :add(nn.Sequencer(nn.LogSoftMax())), 1)

dec.lstmLayers = lstmLayers

local criterion = nn.SequencerCriterion(nn.MaskZeroCriterion(nn.ClassNLLCriterion(),1))
-- Some example data (batchsize = 2) with variable length input and output sequences

-- The input sentences to the encoder, padded with zeros from the left
local encInSeq  = torch.Tensor({{0,0,0,1,2,3},{0,0,4,3,2,1}})
local decInSeq  = torch.Tensor({{6,1,2,3,4,0},{6,5,4,3,2,1}})
local decOutSeq = torch.Tensor({{1,2,3,4,1,1},{5,4,3,2,1,1}})

-- We can forward the symbol witout errors :
print(dec:forward(torch.Tensor{1}))

for i=1,opt.niter do
   enc:zeroGradParameters()
   dec:zeroGradParameters()

   -- Forward pass
   local encOut = enc:forward(encInSeq)
   forwardConnect(enc, dec)
   local decOut = dec:forward(decInSeq)
   decoderOutSplit = nn.SplitTable(1, 1):forward(decOutSeq)

   local err = criterion:forward(decOut, decoderOutSplit)

   print(string.format("Iteration %d ; NLL err = %f ", i, err))

   -- Backward pass
   local gradOutput = criterion:backward(decOut, decoderOutSplit)
   dec:backward(decInSeq, gradOutput)
   backwardConnect(enc, dec)
   local zeroTensor = torch.Tensor(encOut):zero()
   enc:backward(encInSeq, zeroTensor)

   dec:updateParameters(opt.learningRate)
   enc:updateParameters(opt.learningRate)
end

-- The program carshes here :
print(dec:forward(torch.Tensor{1}))
GHamrouni commented 7 years ago

The encoder/decoder expects a mini-batch for input. To correct the behavior one must forward a tensor of of size 2x1. Example torch.Tensor{{GO},{GO}}