materialsvirtuallab / megnet

Graph Networks as a Universal Machine Learning Framework for Molecules and Crystals
BSD 3-Clause "New" or "Revised" License
497 stars 155 forks source link

List index out of range while training the NN #370

Open FranK1308 opened 1 year ago

FranK1308 commented 1 year ago

Hi, In step #5, the following error occurs:

IndexError Traceback (most recent call last) Input In [69], in <cell line: 3>() 1 # 5. Model training 2 callbacks = [ReduceLRUponNan(patience=500), ManualStop()] ----> 3 model.train_from_graphs( 4 train_graphs, train_targets, val_graphs, val_targets, epochs=EPOCHS, verbose=2, initial_epoch=0, callbacks=callbacks 5 )

File ~\anaconda3\envs\ML2\lib\site-packages\megnet\models\base.py:226, in GraphModel.train_from_graphs(self, train_graphs, train_targets, validation_graphs, validation_targets, sample_weights, epochs, batch_size, verbose, callbacks, prev_model, lr_scaling_factor, patience, save_checkpoint, automatic_correction, dirname, *kwargs) 224 train_generator = self._create_generator(train_inputs, sample_weights=sample_weights, batch_size=batch_size) 225 steps_per_train = int(np.ceil(len(train_graphs) / batch_size)) --> 226 self.fit( 227 train_generator, 228 steps_per_epoch=steps_per_train, 229 validation_data=val_generator, 230 validation_steps=steps_per_val, 231 epochs=epochs, 232 verbose=verbose, 233 callbacks=callbacks, 234 **kwargs, 235 ) 236 return self

File ~\anaconda3\envs\ML2\lib\site-packages\keras\engine\training_v1.py:855, in Model.fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs) 852 self._check_call_args("fit") 854 func = self._select_training_loop(x) --> 855 return func.fit( 856 self, 857 x=x, 858 y=y, 859 batch_size=batch_size, 860 epochs=epochs, 861 verbose=verbose, 862 callbacks=callbacks, 863 validation_split=validation_split, 864 validation_data=validation_data, 865 shuffle=shuffle, 866 class_weight=class_weight, 867 sample_weight=sample_weight, 868 initial_epoch=initial_epoch, 869 steps_per_epoch=steps_per_epoch, 870 validation_steps=validation_steps, 871 validation_freq=validation_freq, 872 max_queue_size=max_queue_size, 873 workers=workers, 874 use_multiprocessing=use_multiprocessing, 875 )

File ~\anaconda3\envs\ML2\lib\site-packages\keras\engine\training_generator_v1.py:648, in GeneratorOrSequenceTrainingLoop.fit(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing) 644 model._validate_or_infer_batch_size(batch_size, steps_per_epoch, x) 645 training_utils_v1.check_generator_arguments( 646 y, sample_weight, validation_split=validation_split 647 ) --> 648 return fit_generator( 649 model, 650 x, 651 steps_per_epoch=steps_per_epoch, 652 epochs=epochs, 653 verbose=verbose, 654 callbacks=callbacks, 655 validation_data=validation_data, 656 validation_steps=validation_steps, 657 validation_freq=validation_freq, 658 class_weight=class_weight, 659 max_queue_size=max_queue_size, 660 workers=workers, 661 use_multiprocessing=use_multiprocessing, 662 shuffle=shuffle, 663 initial_epoch=initial_epoch, 664 steps_name="steps_per_epoch", 665 )

File ~\anaconda3\envs\ML2\lib\site-packages\keras\engine\training_generator_v1.py:351, in model_iteration(model, data, steps_per_epoch, epochs, verbose, callbacks, validation_data, validation_steps, validation_freq, class_weight, max_queue_size, workers, use_multiprocessing, shuffle, initial_epoch, mode, batch_size, steps_name, **kwargs) 345 epoch_logs = cbks.make_logs( 346 model, epoch_logs, valresults, mode, prefix="val" 347 ) 349 if mode == ModeKeys.TRAIN: 350 # Epochs only apply to fit. --> 351 callbacks.on_epoch_end(epoch, epoch_logs) 353 # Recreate dataset iterator for the next epoch. 354 if reset_dataset_after_each_epoch and epoch < epochs - 1:

File ~\anaconda3\envs\ML2\lib\site-packages\keras\callbacks.py:448, in CallbackList.on_epoch_end(self, epoch, logs) 446 logs = self._process_logs(logs) 447 for callback in self.callbacks: --> 448 callback.on_epoch_end(epoch, logs)

File ~\anaconda3\envs\ML2\lib\site-packages\megnet\callbacks.py:234, in ReduceLRUponNan.on_epoch_end(self, epoch, logs) 232 logs = logs or {} 233 loss = logs.get("loss") --> 234 last_saved_epoch, last_metric, last_file = self._get_checkpoints() 235 if last_saved_epoch is not None: 236 if last_saved_epoch + self.patience <= epoch:

File ~\anaconda3\envs\ML2\lib\site-packages\megnet\callbacks.py:287, in ReduceLRUponNan._get_checkpoints(self) 285 epochs = [] 286 for i in all_check_points: --> 287 metrics = re.findall(file_pattern, i)[0] 288 metric_values.append(float(metrics[metric_index])) 289 epochs.append(int(metrics[epoch_index]))

IndexError: list index out of range