coqui-ai / TTS

🐸💬 - a deep learning toolkit for Text-to-Speech, battle-tested in research and production
http://coqui.ai
Mozilla Public License 2.0
35.83k stars 4.38k forks source link

[Bug] Segmentation fault (core dumped) when training tacotron2 #1492

Closed kin0303 closed 2 years ago

kin0303 commented 2 years ago

I did training with the tacotron2 ddc model using the ljspeech dataset and the training ran for a while then stopped with the statement: Segmentation fault (core dumped)

import os

# Trainer: Where the ✨️ happens.
# TrainingArgs: Defines the set of arguments of the Trainer.
from trainer import Trainer, TrainerArgs

# Tacotron2Config: all model related values for training, validating and testing.
from TTS.tts.configs.tacotron2_config import Tacotron2Config

# BaseDatasetConfig: defines name, formatter and path of the dataset.
from TTS.tts.configs.shared_configs import BaseDatasetConfig
from TTS.tts.datasets import load_tts_samples
from TTS.tts.models.tacotron2 import Tacotron2
from TTS.tts.utils.text.tokenizer import TTSTokenizer
from TTS.utils.audio import AudioProcessor
from TTS.config.shared_configs import BaseAudioConfig

# we use the same path as this script as our training folder.
output_path = os.path.dirname(os.path.abspath(__file__))

# DEFINE DATASET CONFIG
# Set LJSpeech as our target dataset and define its path.
# You can also use a simple Dict to define the dataset and pass it to your custom formatter.
dataset_config = BaseDatasetConfig(
    name="ljspeech", meta_file_train="metadata.csv", path=os.path.join(output_path, "../LJSpeech-1.1/")
)

# INITIALIZE THE TRAINING CONFIGURATION
# Configure the model. Every config class inherits the BaseTTSConfig.
audio_config = BaseAudioConfig(
    sample_rate=22050,
    do_trim_silence=True,
    trim_db=60.0,
    signal_norm=False,
    mel_fmin=0.0,
    mel_fmax=8000,
    spec_gain=1.0,
    log_func="np.log",
    ref_level_db=20,
    preemphasis=0.0,
)

config = Tacotron2Config(  # This is the config that is saved for the future use
    audio=audio_config,
    batch_size=4,
    eval_batch_size=4,
    num_loader_workers=2,
    num_eval_loader_workers=2,
    run_eval=True,
    test_delay_epochs=-1,
    r=6,
    gradual_training=[[0, 6, 4], [10000, 4, 4], [50000, 3, 4], [100000, 2, 4]],
    double_decoder_consistency=True,
    epochs=1000,
    text_cleaner="phoneme_cleaners",
    use_phonemes=True,
    phoneme_language="en-us",
    phoneme_cache_path=os.path.join(output_path, "phoneme_cache"),
    print_step=10,
    print_eval=True,
    mixed_precision=False,
    output_path=output_path,
    datasets=[dataset_config],
)

# INITIALIZE THE AUDIO PROCESSOR
# Audio processor is used for feature extraction and audio I/O.
# It mainly serves to the dataloader and the training loggers.
ap = AudioProcessor.init_from_config(config)

# INITIALIZE THE TOKENIZER
# Tokenizer is used to convert text to sequences of token IDs.
# If characters are not defined in the config, default characters are passed to the config
tokenizer, config = TTSTokenizer.init_from_config(config)

# LOAD DATA SAMPLES
# Each sample is a list of ```[text, audio_file_path, speaker_name]```
# You can define your custom sample loader returning the list of samples.
# Or define your custom formatter and pass it to the `load_tts_samples`.
# Check `TTS.tts.datasets.load_tts_samples` for more details.

from TTS.tts.datasets import load_tts_samples

# custom formatter implementation
def formatter(root_path, manifest_file, **kwargs):  # pylint: disable=unused-argument
    """Assumes each line as ```<filename>|<transcription>```
    """
    txt_file = os.path.join(root_path, manifest_file)
    items = []
    speaker_name = "my_speaker"
    with open(txt_file, "r", encoding="utf-8") as ttf:
        for line in ttf:
            cols = line.split("|")

            #wav_file = os.path.join(root_path, "wavs", cols[0] + ".wav")
            wav_file = "/media/DATA-2/TTS/coqui/LJSpeech-1.1/wavs/" + cols[0] + ".wav"
            text = cols[1]
            items.append({"text":text, "audio_file":wav_file, "speaker_name":speaker_name})
    return items

# load training samples
train_samples, eval_samples = load_tts_samples(dataset_config, eval_split=True, formatter=formatter)
# INITIALIZE THE MODEL
# Models take a config object and a speaker manager as input
# Config defines the details of the model like the number of layers, the size of the embedding, etc.
# Speaker manager is used by multi-speaker models.
print("================== train_samples ========================")
print("len data : ", len(train_samples))
print(train_samples)

print("======================== eval_samples ================")
print("len data : ", len(eval_samples))
print(eval_samples)

model = Tacotron2(config, ap, tokenizer, speaker_manager=None)

# INITIALIZE THE TRAINER
# Trainer provides a generic API to train all the 🐸TTS models with all its perks like mixed-precision training,
# distributed training, etc.
trainer = Trainer(
    TrainerArgs(), config, output_path, model=model, train_samples=train_samples, eval_samples=eval_samples
)

# AND... 3,2,1... 🚀
trainer.fit()

output display as follows:

--> STEP: 647/2162 -- GLOBAL_STEP: 647
| > decoder_loss: 35.02273 (33.81891)
| > postnet_loss: 37.02569 (35.82565)
| > stopnet_loss: 0.82287 (0.85986)
| > decoder_coarse_loss: 35.01795 (33.80500)
| > decoder_ddc_loss: 0.00264 (0.00408)
| > ga_loss: 0.00451 (0.00664)
| > decoder_diff_spec_loss: 0.42732 (0.43585)
| > postnet_diff_spec_loss: 4.44786 (4.47058)
| > decoder_ssim_loss: 0.99999 (0.99978)
| > postnet_ssim_loss: 0.99983 (0.99947)
| > loss: 29.33145 (28.48289)
| > align_error: 0.98594 (0.97906)
| > grad_norm: 3.32803 (3.73880)
| > current_lr: 0.00000
| > step_time: 0.59430 (0.45785)
| > loader_time: 0.00150 (0.00150)

Segmentation fault (core dumped)

Environment

{ "CUDA": { "GPU": [ "NVIDIA GeForce GTX 1660 Ti" ], "available": true, "version": "10.2" }, "Packages": { "PyTorch_debug": false, "PyTorch_version": "1.11.0+cu102", "TTS": "0.6.1", "numpy": "1.19.5" }, "System": { "OS": "Linux", "architecture": [ "64bit", "ELF" ], "processor": "x86_64", "python": "3.8.0", "version": "#118~18.04.1-Ubuntu SMP Thu Mar 3 13:53:15 UTC 2022" } }

kin0303 commented 2 years ago

I'm trying to run again and error information is

RuntimeError: CUDA error: CUBLAS_STATUS_EXECUTION_FAILED when calling `cublasSgemm( handle, opa, opb, m, n, k, &alpha, a, lda, b, ldb, &beta, c, ldc)`
a-froghyar commented 2 years ago

There's an error in Tacotron models that is getting fixed by https://github.com/coqui-ai/TTS/pull/977/commits/27825d815044f75c2c8b43177a5aa21f47b33bae, I'd suggest trying running the training again after the PR has been merged

kin0303 commented 2 years ago

There's an error in Tacotron models that is getting fixed by 27825d8, I'd suggest trying running the training again after the PR has been merged

still error

--> STEP: 2027/3243 -- GLOBAL_STEP: 5270
     | > decoder_loss: 34.25191  (34.01939)
     | > postnet_loss: 36.28823  (36.02014)
     | > stopnet_loss: 0.70222  (0.75120)
     | > decoder_coarse_loss: 34.14155  (33.93135)
     | > decoder_ddc_loss: 0.00166  (0.00278)
     | > ga_loss: 0.00311  (0.00502)
     | > decoder_diff_spec_loss: 0.44446  (0.43518)
     | > postnet_diff_spec_loss: 4.45212  (4.42896)
     | > decoder_ssim_loss: 0.99998  (0.99993)
     | > postnet_ssim_loss: 0.99982  (0.99966)
     | > loss: 28.61271  (28.48565)
     | > align_error: 0.98953  (0.98365)
     | > grad_norm: 3.22861  (3.79975)
     | > current_lr: 0.00000 
     | > step_time: 0.71590  (0.44813)
     | > loader_time: 0.00150  (0.00726)

 ! Run is kept in /media/DATA-2/TTS/coqui/TTS/run-April-14-2022_09+10AM-0cf3265a
Traceback (most recent call last):
  File "/media/DATA-2/TTS/coqui/tts_coqui/lib/python3.8/site-packages/trainer/trainer.py", line 1461, in fit
    self._fit()
  File "/media/DATA-2/TTS/coqui/tts_coqui/lib/python3.8/site-packages/trainer/trainer.py", line 1445, in _fit
    self.train_epoch()
  File "/media/DATA-2/TTS/coqui/tts_coqui/lib/python3.8/site-packages/trainer/trainer.py", line 1224, in train_epoch
    _, _ = self.train_step(batch, batch_num_steps, cur_step, loader_start_time)
  File "/media/DATA-2/TTS/coqui/tts_coqui/lib/python3.8/site-packages/trainer/trainer.py", line 1057, in train_step
    outputs, loss_dict_new, step_time = self._optimize(
  File "/media/DATA-2/TTS/coqui/tts_coqui/lib/python3.8/site-packages/trainer/trainer.py", line 946, in _optimize
    outputs, loss_dict = self._model_train_step(batch, model, criterion)
  File "/media/DATA-2/TTS/coqui/tts_coqui/lib/python3.8/site-packages/trainer/trainer.py", line 902, in _model_train_step
    return model.train_step(*input_args)
  File "/media/DATA-2/TTS/coqui/TTS/TTS/tts/models/tacotron2.py", line 279, in train_step
    outputs = self.forward(text_input, text_lengths, mel_input, mel_lengths, aux_input)
  File "/media/DATA-2/TTS/coqui/TTS/TTS/tts/models/tacotron2.py", line 172, in forward
    decoder_outputs, alignments, stop_tokens = self.decoder(encoder_outputs, mel_specs, input_mask)
  File "/media/DATA-2/TTS/coqui/tts_coqui/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1110, in _call_impl
    return forward_call(*input, **kwargs)
  File "/media/DATA-2/TTS/coqui/TTS/TTS/tts/layers/tacotron/tacotron2.py", line 321, in forward
    decoder_output, attention_weights, stop_token = self.decode(memory)
  File "/media/DATA-2/TTS/coqui/TTS/TTS/tts/layers/tacotron/tacotron2.py", line 277, in decode
    self.decoder_hidden, self.decoder_cell = self.decoder_rnn(
  File "/media/DATA-2/TTS/coqui/tts_coqui/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1110, in _call_impl
    return forward_call(*input, **kwargs)
  File "/media/DATA-2/TTS/coqui/tts_coqui/lib/python3.8/site-packages/torch/nn/modules/rnn.py", line 1179, in forward
    ret = _VF.lstm_cell(
RuntimeError: CUDA error: CUBLAS_STATUS_EXECUTION_FAILED when calling `cublasSgemm( handle, opa, opb, m, n, k, &alpha, a, lda, b, ldb, &beta, c, ldc)`
stale[bot] commented 2 years ago

This issue has been automatically marked as stale because it has not had recent activity. It will be closed if no further activity occurs. Thank you for your contributions. You might also look our discussion channels.