I am trying to train a seq2seq-attention model using 2 GPUs and would like to use Tensorboard for visualization. I followed the nmt-keras tutorial for Tensorboard but still got an error. Here is the code that I used:
def start_training(use_gpu):
ds = Dataset('tutorial_dataset', 'tutorial', silence=False)
PATH = ""
ds.setOutput(PATH + "train_correct.txt",
'train',
type='text',
id='target_text',
tokenization='tokenize_basic',
build_vocabulary=True,
pad_on_batch=True,
sample_weights=True,
max_text_len=100,
max_words=50000,
min_occ=1)
ds.setOutput(PATH + "validation_correct.txt",
'val',
type='text',
id='target_text',
pad_on_batch=True,
tokenization='tokenize_basic',
sample_weights=True,
max_text_len=100,
max_words=0)
ds.setInput(PATH + "train_error.txt",
'train',
type='text',
id='source_text',
pad_on_batch=True,
tokenization='tokenize_basic',
build_vocabulary=True,
fill='end',
max_text_len=100,
max_words=50000,
min_occ=1)
ds.setInput(PATH + "validation_error.txt",
'val',
type='text',
id='source_text',
pad_on_batch=True,
tokenization='tokenize_basic',
fill='end',
max_text_len=100,
min_occ=1)
"""...and for the 'state_below' data. Note that: 1) The offset flat is set to 1, which means that the text will be shifted to the right 1 position. 2) During sampling time, we won't have this input. Hence, we 'hack' the dataset model by inserting an artificial input, of type 'ghost' for the validation split."""
ds.setInput(PATH + "train_correct.txt",
'train',
type='text',
id='state_below',
required=False,
tokenization='tokenize_basic',
pad_on_batch=True,
build_vocabulary='target_text',
offset=1,
fill='end',
max_text_len=100,
max_words=50000)
ds.setInput(None,
'val',
type='ghost',
id='state_below',
required=False)
"""We can also keep the literal source words (for replacing unknown words)."""
for split, input_text_filename in zip(['train', 'val'], [PATH + "train_error.txt", PATH + "validation_error.txt"]):
ds.setRawInput(input_text_filename,
split,
type='file-name',
id='raw_source_text',
overwrite_split=True)
"""We also need to match the references with the inputs. Since we only have one reference per input sample, we set `repeat=1`."""
keep_n_captions(ds, repeat=1, n=1, set_names=['val'])
"""Finally, we can save our dataset instance for using in other experiments:"""
saveDataset(ds, PATH + "dataset")
"""## 2. Creating and training a Neural Translation Model
Now, we'll create and train a Neural Machine Translation (NMT) model. Since there is a significant number of hyperparameters, we'll use the default ones, specified in the `config.py` file. Note that almost every hardcoded parameter is automatically set from config if we run `main.py `.
We'll create an `'AttentionRNNEncoderDecoder'` (a LSTM encoder-decoder with attention mechanism). Refer to the [`model_zoo.py`](https://github.com/lvapeab/nmt-keras/blob/master/nmt_keras/model_zoo.py) file for other models (e.g. Transformer).
So first, let's import the model and the hyperparameters. We'll also load the dataset we stored in the previous section (not necessary as it is in memory, but as a demonstration):
"""
params = load_parameters()
dataset = loadDataset(PATH + "dataset/Dataset_tutorial_dataset.pkl")
"""Since the number of words in the dataset may be unknown beforehand, we must update the params information according to the dataset instance:"""
params['INPUT_VOCABULARY_SIZE'] = dataset.vocabulary_len['source_text']
params['OUTPUT_VOCABULARY_SIZE'] = dataset.vocabulary_len['target_text']
params['USE_CUDNN'] = use_gpu
params['EARLY_STOP'] = True
params['PATIENCE'] = 10
params['SAVE_EACH_EVALUATION'] = True
params['STORE_PATH'] = PATH + "model/"
params['ATTENTION_MODE'] = "add"
params['N_LAYERS_ENCODER'] = 2
params['N_LAYERS_DECODER'] = 2
params['SOURCE_TEXT_EMBEDDING_SIZE'] = 128
params['TARGET_TEXT_EMBEDDING_SIZE'] = 128
params['SKIP_VECTORS_HIDDEN_SIZE'] = 128
params['ATTENTION_SIZE'] = 128
params['ENCODER_HIDDEN_SIZE'] = 128
params['DECODER_HIDDEN_SIZE'] = 128
params['ENCODER_RNN_TYPE'] = "GRU"
params['DECODER_RNN_TYPE'] = "ConditionalGRU"
params['METRICS'] = ['sacrebleu']
params['STOP_METRIC'] = 'sacrebleu'
params['APPLY_DETOKENIZATION'] = True
params['LENGTH_PENALTY'] = True
params['LENGTH_NORM_FACTOR'] = 1.0
params['TENSORBOARD'] = True
params['LOG_DIR'] = 'tensorboard_logs'
params['EMBEDDING_FREQ'] = 1
params['WRITE_GRAPH'] = True
params['WRITE_GRADS'] = True
params['WRITE_IMAGES'] = True
params['EMBEDDING_LAYER_NAMES'] = ["source_word_embedding", "target_word_embedding"]
params['LABEL_WORD_EMBEDDINGS_WITH_VOCAB'] = True
params['WORD_EMBEDDINGS_LABELS'] = ['source_text', 'target_text']
nmt_model = TranslationModel(params,
model_type='AttentionRNNEncoderDecoder',
model_name='tutorial_model',
vocabularies=dataset.vocabulary,
store_path=params['STORE_PATH'],
verbose=True)
inputMapping = dict()
for i, id_in in enumerate(params['INPUTS_IDS_DATASET']):
pos_source = dataset.ids_inputs.index(id_in)
id_dest = nmt_model.ids_inputs[i]
inputMapping[id_dest] = pos_source
nmt_model.setInputsMapping(inputMapping)
outputMapping = dict()
for i, id_out in enumerate(params['OUTPUTS_IDS_DATASET']):
pos_target = dataset.ids_outputs.index(id_out)
id_dest = nmt_model.ids_outputs[i]
outputMapping[id_dest] = pos_target
nmt_model.setOutputsMapping(outputMapping)
"""We can add some callbacks for controlling the training (e.g. Sampling each N updates, early stop, learning rate annealing...). For instance, let's build a sampling callback. After each epoch, it will compute the BLEU scores on the development set using the sacreBLEU package. We need to pass some configuration variables to the callback (in the extra_vars dictionary):"""
search_params = {
'language': 'en',
'tokenize_f': eval('dataset.' + 'tokenize_basic'),
'beam_size': 1,
'optimized_search': True,
'n_gpus' : 2,
'model_inputs': params['INPUTS_IDS_MODEL'],
'model_outputs': params['OUTPUTS_IDS_MODEL'],
'dataset_inputs': params['INPUTS_IDS_DATASET'],
'dataset_outputs': params['OUTPUTS_IDS_DATASET'],
'n_parallel_loaders': 1,
'maxlen': 100,
'model_inputs': ['source_text', 'state_below'],
'model_outputs': ['target_text'],
'dataset_inputs': ['source_text', 'state_below'],
'dataset_outputs': ['target_text'],
'normalize': True,
'pos_unk': True,
'heuristic': 0,
'state_below_maxlen': 1,
'val': {'references': dataset.extra_variables['val']['target_text']}
}
vocab = dataset.vocabulary['target_text']['idx2words']
callbacks = []
input_text_id = params['INPUTS_IDS_DATASET'][0]
callbacks.append(PrintPerformanceMetricOnEpochEndOrEachNUpdates(nmt_model,
dataset,
gt_id='target_text',
metric_name=['sacrebleu'],
set_name=['val'],
batch_size=256,
each_n_epochs=1,
extra_vars=search_params,
reload_epoch=0,
is_text=True,
input_text_id=input_text_id,
index2word_y=vocab,
sampling_type='max_likelihood',
beam_search=True,
save_path=nmt_model.model_path,
start_eval_on_epoch=0,
write_samples=True,
write_type='list',
verbose=True))
"""Now we are ready to train. Let's set up some training parameters..."""
training_params = {'n_epochs': 500,
'batch_size': 256,
'maxlen': 50,
'epochs_for_save': 1,
'verbose': 1,
'eval_on_sets': [],
'n_parallel_loaders': 1,
'extra_callbacks': callbacks,
'reload_epoch': 0,
'epoch_offset': 0,
'n_gpus': 2,
'tensorboard': True,
'tensorboard_params': {'log_dir': 'tensorboard_logs', 'embeddings_freq': 1,
'embeddings_metadata': None, 'word_embedding_labels': ['source_text', 'target_text'],
'label_word_embeddings_with_vocab': True,
'embeddings_layer_names': ["source_word_embedding", "target_word_embedding"],
'histogram_freq': 1, 'batch_size': 100, 'write_graph': True, 'write_grads': True, 'write_images': True}}
nmt_model.trainNet(dataset, training_params)
Here is the full log of the error I got (notice that if I disable Tensorboard, everything trains perfectly):
Traceback (most recent call last):
File "train_model.py", line 384, in <module>
main()
File "train_model.py", line 373, in main
start_training(use_gpu)
File "train_model.py", line 235, in start_training
nmt_model.trainNet(dataset, training_params)
File "/WAVE/users/unix/nvtran/.local/lib/python3.7/site-packages/keras_wrapper/cnn_model.py", line 923, in trainNet
self.__train(ds, params)
File "/WAVE/users/unix/nvtran/.local/lib/python3.7/site-packages/keras_wrapper/cnn_model.py", line 1040, in __train
callback_tensorboard.set_model(self.model)
File "/WAVE/users/unix/nvtran/keras/keras/callbacks/tensorboard_v1.py", line 199, in set_model
layer.output)
File "/WAVE/apps/eb/software/TensorFlow/1.14.0-fosscuda-2019a-Python-3.7.2/lib/python3.7/site-packages/tensorflow/python/summary/summary.py", line 179, in histogram
tag=tag, values=values, name=scope)
File "/WAVE/apps/eb/software/TensorFlow/1.14.0-fosscuda-2019a-Python-3.7.2/lib/python3.7/site-packages/tensorflow/python/ops/gen_logging_ops.py", line 329, in histogram_summary
"HistogramSummary", tag=tag, values=values, name=name)
File "/WAVE/apps/eb/software/TensorFlow/1.14.0-fosscuda-2019a-Python-3.7.2/lib/python3.7/site-packages/tensorflow/python/framework/op_def_library.py", line 626, in _apply_op_helper
param_name=input_name)
File "/WAVE/apps/eb/software/TensorFlow/1.14.0-fosscuda-2019a-Python-3.7.2/lib/python3.7/site-packages/tensorflow/python/framework/op_def_library.py", line 60, in _SatisfiesTypeConstraint
", ".join(dtypes.as_dtype(x).name for x in allowed_list)))
TypeError: Value passed to parameter 'values' has DataType bool not in list of allowed values: float32, float64, int32, uint8, int16, int8, int64, bfloat16, uint16, float16, uint32, uint64
I am trying to train a seq2seq-attention model using 2 GPUs and would like to use Tensorboard for visualization. I followed the nmt-keras tutorial for Tensorboard but still got an error. Here is the code that I used:
Here is the full log of the error I got (notice that if I disable Tensorboard, everything trains perfectly):