Closed NamTran838P closed 4 years ago
Can you please share the config.py
file?
I made changes to the config.py
file inside my training script. Please find the config-modifying snippet below:
params['INPUT_VOCABULARY_SIZE'] = dataset.vocabulary_len['source_text']
params['OUTPUT_VOCABULARY_SIZE'] = dataset.vocabulary_len['target_text']
params['USE_CUDNN'] = True
params['EARLY_STOP'] = True
params['PATIENCE'] = 10
params['SAVE_EACH_EVALUATION'] = True
params['STORE_PATH'] = PATH + "model/"
params['ATTENTION_MODE'] = "add"
params['N_LAYERS_ENCODER'] = 2
params['N_LAYERS_DECODER'] = 2
params['SOURCE_TEXT_EMBEDDING_SIZE'] = 64
params['TARGET_TEXT_EMBEDDING_SIZE'] = 64
params['SKIP_VECTORS_HIDDEN_SIZE'] = 64
params['ATTENTION_SIZE'] = 64
params['ENCODER_HIDDEN_SIZE'] = 64
params['DECODER_HIDDEN_SIZE'] = 64
params['ENCODER_RNN_TYPE'] = "GRU"
params['DECODER_RNN_TYPE'] = "ConditionalGRU"
params['MODEL_SIZE'] = 64
params['METRICS'] = ['sacrebleu']
params['STOP_METRIC'] = 'sacrebleu'
params['DETOKENIZATION_METHOD'] = 'detokenize_basic'
params['APPLY_DETOKENIZATION'] = True
params['LENGTH_PENALTY'] = True
params['LENGTH_NORM_FACTOR'] = 1.0
"""Now, we create a `TranslationModel` instance:"""
#import tensorflow as tf
nmt_model = TranslationModel(params,
model_type='Transformer',
model_name='transformer_model',
vocabularies=dataset.vocabulary,
store_path=params['STORE_PATH'],
verbose=True)
#
"""Next, we must define the inputs and outputs mapping from our Dataset instance to our model:"""
inputMapping = dict()
for i, id_in in enumerate(params['INPUTS_IDS_DATASET']):
pos_source = dataset.ids_inputs.index(id_in)
id_dest = nmt_model.ids_inputs[i]
inputMapping[id_dest] = pos_source
nmt_model.setInputsMapping(inputMapping)
outputMapping = dict()
for i, id_out in enumerate(params['OUTPUTS_IDS_DATASET']):
pos_target = dataset.ids_outputs.index(id_out)
id_dest = nmt_model.ids_outputs[i]
outputMapping[id_dest] = pos_target
nmt_model.setOutputsMapping(outputMapping)
"""We can add some callbacks for controlling the training (e.g. Sampling each N updates, early stop, learning rate annealing...). For instance, let's build a sampling callback. After each epoch, it will compute the BLEU scores on the development set using the sacreBLEU package. We need to pass some configuration variables to the callback (in the extra_vars dictionary):"""
search_params = {
'language': 'en',
'tokenize_f': eval('dataset.' + 'tokenize_basic'),
'beam_size': 1,
'optimized_search': True,
'n_gpus' : 2,
'model_inputs': params['INPUTS_IDS_MODEL'],
'model_outputs': params['OUTPUTS_IDS_MODEL'],
'dataset_inputs': params['INPUTS_IDS_DATASET'],
'dataset_outputs': params['OUTPUTS_IDS_DATASET'],
'n_parallel_loaders': 1,
'maxlen': 100,
'model_inputs': ['source_text', 'state_below'],
'model_outputs': ['target_text'],
'dataset_inputs': ['source_text', 'state_below'],
'dataset_outputs': ['target_text'],
'normalize': True,
'pos_unk': True,
'heuristic': 0,
'state_below_maxlen': 1,
'val': {'references': dataset.extra_variables['val']['target_text']}
}
vocab = dataset.vocabulary['target_text']['idx2words']
callbacks = []
input_text_id = params['INPUTS_IDS_DATASET'][0]
callbacks.append(PrintPerformanceMetricOnEpochEndOrEachNUpdates(nmt_model,
dataset,
gt_id='target_text',
metric_name=['sacrebleu'],
set_name=['val'],
batch_size=256,
each_n_epochs=2,
extra_vars=search_params,
reload_epoch=0,
is_text=True,
input_text_id=input_text_id,
index2word_y=vocab,
sampling_type='max_likelihood',
beam_search=True,
save_path=nmt_model.model_path,
start_eval_on_epoch=0,
write_samples=True,
write_type='list',
verbose=True))
"""Now we are ready to train. Let's set up some training parameters..."""
#params['TENSORBOARD'] = True
#params['LOG_DIR'] = 'tensorboard_logs'
#params['EMBEDDING_FREQ'] = 1
#params['EMBEDDING_LAYER_NAMES'] = ["source_word_embedding", "target_word_embedding"]
#params['LABEL_WORD_EMBEDDINGS_WITH_VOCAB'] = True
#params['WORD_EMBEDDINGS_LABELS'] = ['source_text', 'target_text']
training_params = {'n_epochs': 500,
'batch_size': 256,
'maxlen': 50,
'epochs_for_save': 1,
'verbose': 1,
'eval_on_sets': [],
'n_parallel_loaders': 1,
'extra_callbacks': callbacks,
'reload_epoch': 0,
'epoch_offset': 0,
'n_gpus': 2}
#'tensorboard': True,
#'tensorboard_params': {'log_dir': 'tensorboard_logs', 'embeddings_freq': 1,
#'embeddings_layer_names': [],
#'histogram_freq': 1, 'batch_size': 100, 'write_graph': True, 'write_grads': True, 'write_images': True}
"""And train!"""
nmt_model.trainNet(dataset, training_params)
When using a Transformer model, you should set the option attend_on_output=True
(defaults to False
) in the search params, as in:
https://github.com/lvapeab/nmt-keras/blob/1a5b93c348a5391e2f02f0c2eb0e4b18fa4f8698/nmt_keras/build_callbacks.py#L57
I tried what you suggested by adding:
params['MODEL_TYPE'] = "Transformer"
params['ATTEND_ON_OUTPUT'] = True
search_params = {
'language': 'en',
'tokenize_f': eval('dataset.' + 'tokenize_basic'),
'beam_size': 1,
'optimized_search': True,
'n_gpus' : 2,
'attend_on_output': True,
'model_inputs': params['INPUTS_IDS_MODEL'],
'model_outputs': params['OUTPUTS_IDS_MODEL'],
'dataset_inputs': params['INPUTS_IDS_DATASET'],
'dataset_outputs': params['OUTPUTS_IDS_DATASET'],
'n_parallel_loaders': 1,
'maxlen': 100,
'model_inputs': ['source_text', 'state_below'],
'model_outputs': ['target_text'],
'dataset_inputs': ['source_text', 'state_below'],
'dataset_outputs': ['target_text'],
'normalize': True,
'pos_unk': True,
'heuristic': 0,
'state_below_maxlen': 1,
'val': {'references': dataset.extra_variables['val']['target_text']}
}
Unfortunately, the same issue is still there during validation.
Getting same error... Please help!
Oh, the problem is that pos_unk
is unimplemented for the Transformer
model.
It should work if you set pos_unk': False
in the search_params
.
I've seen that this is not taken into account in the colab notebooks, which have these options hardcoded (in the library is properly managed). I've also noticed that 'state_below_maxlen'
should be -1
.
I'll update the notebooks.
Hello, thanks for the response. Training with evaluation on validation data now works fine. However, when I try to evaluate on a test dataset and decode the beam search predictions into words, it breaks again. Error:
[08/04/2020 11:08:03] Decoding beam search prediction ...
Traceback (most recent call last):
File "eval.py", line 73, in <module>
verbose=params['VERBOSE'])
File "/data/home/pcori/Chatbot/chat-env/lib/python3.6/site-packages/keras_wrapper/utils.py", line 1116, in decode_predictions_beam_search
preds]
File "/data/home/pcori/Chatbot/chat-env/lib/python3.6/site-packages/keras_wrapper/utils.py", line 1115, in <listcomp>
flattened_predictions = [list(map(lambda x: index2word[x], pred)) for pred in
TypeError: 'numpy.int64' object is not iterable
Code:
dataset = loadDataset(os.path.join(DATA_PATH, "dataset/Dataset_tutorial_dataset.pkl"))
# Load model
nmt_model = loadModel(MODEL_PATH, 270)
params = load_parameters()
params_prediction = {
'language': 'en',
'tokenize_f': eval('dataset.' + 'tokenize_basic'),
'beam_size': 5,
'optimized_search': True,
'model_inputs': params['INPUTS_IDS_MODEL'],
'model_outputs': params['OUTPUTS_IDS_MODEL'],
'dataset_inputs': params['INPUTS_IDS_DATASET'],
'dataset_outputs': params['OUTPUTS_IDS_DATASET'],
'n_parallel_loaders': 1,
'maxlen': 50,
'model_inputs': ['source_text', 'state_below'],
'model_outputs': ['target_text'],
'dataset_inputs': ['source_text', 'state_below'],
'dataset_outputs': ['target_text'],
'normalize': True,
'pos_unk': False,
'heuristic': 0,
'state_below_maxlen': -1,
'predict_on_sets': ['test'],
'verbose': 0,
}
dataset.setInput(os.path.join(DATA_PATH, 'test_x.txt'),
'test',
type='text',
id='source_text',
pad_on_batch=True,
tokenization='tokenize_basic',
fill='end',
max_text_len=30,
min_occ=0,
overwrite_split=True)
dataset.setInput(None,
'test',
type='ghost',
id='state_below',
required=False,
overwrite_split=True)
dataset.setRawInput(os.path.join(DATA_PATH, 'test_x.txt'),
'test',
type='file-name',
id='raw_source_text',
overwrite_split=True)
vocab = dataset.vocabulary['target_text']['idx2words']
predictions = nmt_model.predictBeamSearchNet(dataset, params_prediction)['test']
predictions = decode_predictions_beam_search(predictions[0], # The first element of predictions contain the word indices.
vocab,
verbose=params['VERBOSE'])
It breaks on the last line when calling decode_predictions_beam_search
.
I got the same too. I am now able to train and perform validation during training. However, when I load the model and perform predictions, it gives the following error:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/WAVE/users/unix/nvtran/grammar_check_transformer/grammar_check.py", line 82, in check_grammar
verbose=self.params['VERBOSE'])
File "/WAVE/users/unix/nvtran/.local/lib/python3.7/site-packages/keras_wrapper/utils.py", line 1116, in decode_predictions_beam_search
preds]
File "/WAVE/users/unix/nvtran/.local/lib/python3.7/site-packages/keras_wrapper/utils.py", line 1115, in <listcomp>
flattened_predictions = [list(map(lambda x: index2word[x], pred)) for pred in
TypeError: 'numpy.int64' object is not iterable
It is again a casuistic problem with pos_unk
. In the toolkit is properly handled (I recommend to use it), while in the notebook it wasn't. I updated it. Basically, you'll need to do something like:
if params_prediction['pos_unk']:
samples = predictions[0] # The first element of predictions contain the word indices.
alphas = predictions[1]
else:
samples = predictions
heuristic = None
sources = None
predictions = decode_predictions_beam_search(samples, vocab)
Thanks, it finally works now.
Hello,
I am getting an error when training with a Transformer model. The problem happens when performing validation. It should be noted that using the exact same configuration for the standard seq2seq with attention model works without any issues. I am trying to get the model to perform grammar corrections on texts with grammatical errors. Thanks for your help.
An excerpt of the terminal output is shown below:
I0405 16:41:01.451164 139661748033344 cnn_model.py:135] <<< Model saved >>> I0405 16:41:01.451448 139661748033344 callbacks.py:288] Evaluating only every 2 epochs Epoch 2/500 5722/5722 [==============================] - 12473s 2s/step - loss: 4.3384
I0405 20:08:54.252171 139661748033344 cnn_model.py:83] <<< Saving model to model //epoch_2 ... >>> I0405 20:08:56.893052 139661748033344 cnn_model.py:135] <<< Model saved >>>
I0405 20:08:56.893666 139661748033344 cnn_model.py:1598] <<< Predicting outputs of val set >>> Traceback (most recent call last): File "train_model.py", line 264, in
nmt_model.trainNet(dataset, training_params)
File "/WAVE/users/unix/nvtran/.local/lib/python3.7/site-packages/keras_wrapper /cnn_model.py", line 923, in trainNet
self.train(ds, params)
File "/WAVE/users/unix/nvtran/.local/lib/python3.7/site-packages/keras_wrapper /cnn_model.py", line 1152, in train
initial_epoch=params['epoch_offset'])
File "/WAVE/users/unix/nvtran/.local/lib/python3.7/site-packages/keras/legacy/ interfaces.py", line 91, in wrapper
return func(*args, **kwargs)
File "/WAVE/users/unix/nvtran/.local/lib/python3.7/site-packages/keras/engine/ training.py", line 1709, in fit_generator
initial_epoch=initial_epoch)
File "/WAVE/users/unix/nvtran/.local/lib/python3.7/site-packages/keras/engine/ training_generator.py", line 255, in fit_generator
callbacks.on_epoch_end(epoch, epoch_logs)
File "/WAVE/users/unix/nvtran/.local/lib/python3.7/site-packages/keras/callbac ks.py", line 152, in on_epoch_end
callback.on_epoch_end(epoch, logs)
File "/WAVE/users/unix/nvtran/.local/lib/python3.7/site-packages/keras_wrapper /extra/callbacks.py", line 290, in on_epoch_end
self.evaluate(epoch, counter_name='epoch')
File "/WAVE/users/unix/nvtran/.local/lib/python3.7/site-packages/keras_wrapper /extra/callbacks.py", line 342, in evaluate
predictions_all = self.model_to_eval.predictBeamSearchNet(self.ds, params_pr ediction)[s]
File "/WAVE/users/unix/nvtran/.local/lib/python3.7/site-packages/keras_wrapper /cnn_model.py", line 1750, in predictBeamSearchNet
return_alphas=params['coverage_penalty'])
File "/WAVE/users/unix/nvtran/.local/lib/python3.7/site-packages/keras_wrapper /search.py", line 91, in beam_search
[probs, prev_out] = model.predict_cond_optimized(X, state_below, params, ii, prev_out)
File "/WAVE/users/unix/nvtran/.local/lib/python3.7/site-packages/keras_wrapper /cnn_model.py", line 1471, in predict_cond_optimized
if prev_out[idx].shape[0] == 1:
IndexError: list index out of range