GreedyEmbeddingHelper during training

I tried to implement this with:

start_tokens = tf.fill([self.batch_size], tgt_sos_id)
end_token = tgt_eos_id

def training_decoding():
    # always pass the true target tokens as input to the next step
    # -> model learns to predict next correct word.
    # ('start with training wheels')
    training_helper = tf.contrib.seq2seq.TrainingHelper(
        decoder_emb_inp, iterator.target_sequence_length,
        time_major=self.time_major)

    # Decoder
    nongreedy_decoder = tf.contrib.seq2seq.BasicDecoder(
        cell,
        training_helper,
        decoder_initial_state, )

    outputs, final_context_state, _ = tf.contrib.seq2seq.dynamic_decode(
        nongreedy_decoder,

        # get_greedy_decoder(),
        # needed for per timestep decoding!
        # maximum_iterations=maximum_iterations,

        output_time_major=self.time_major,
        swap_memory=True,
        scope=decoder_scope)

    sample_id = outputs.sample_id

    # Note: there's a subtle difference here between train and inference.
    # We could have set output_layer when create my_decoder
    #   and shared more code between train and inference.
    # We chose to apply the output_layer to all timesteps for speed:
    #   10% improvements for small models & 20% for larger ones.
    # If memory is a concern, we should apply output_layer per timestep.
    logits = self.output_layer(outputs.rnn_output)
    return sample_id, logits, final_context_state

def greedy_decoding():
    # after a few training epochs feed the actual predictions of the model itself
    # -> learn to predict correct sequences on its own
    # ('without training wheels')
    greedy_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
        self.embedding_decoder, start_tokens, end_token)

    greedy_decoder = tf.contrib.seq2seq.BasicDecoder(
        cell,
        greedy_helper,
        decoder_initial_state,
        output_layer=self.output_layer  # applied per timestep
    )

    outputs, final_context_state, _ = tf.contrib.seq2seq.dynamic_decode(
        greedy_decoder,
        # needed for per timestep decoding!
        maximum_iterations=maximum_iterations,
        output_time_major=self.time_major,
        swap_memory=True,
        scope=decoder_scope)

    sample_id = outputs.sample_id
    logits = outputs.rnn_output
    return sample_id, logits, final_context_state

# TODO switch to greedy decoding after some N steps
# FIXME: AttributeError: 'Model' object has no attribute 'global_step'
sample_id, logits = tf.cond(self.global_step < STEPS_WITH_TRAINING_WHEELS,
                                                 lambda: training_decoding(), lambda: greedy_decoding())

But that gets me the error: AttributeError: 'Model' object has no attribute 'global_step' For me that is very unexpected, because that attribute should be inherited from the BaseModel class in any case.

This is basically just using the code from the inference part of _build_decoder(), but even with tf.cond(tf.constant(False), ... I get some shape mismatch in the loss calculation operation.

Grateful for any hints.

tensorflow / nmt

GreedyEmbeddingHelper during training #310