Closed Jamiroquai88 closed 8 months ago
Would you mind sharing train3.0_powerset.py
?
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (C) 2022
# Author: Jan Profant <jan.profant@rev.com>
# All Rights Reserved
import argparse
import sys
import torch
from pytorch_lightning.cli import ReduceLROnPlateau
from torch.optim.lr_scheduler import StepLR, LambdaLR
from pyannote.audio import Pipeline, Model
from pyannote.database import FileFinder, registry
from pyannote.audio.models.segmentation import PyanNet, SSeRiouSS
from pyannote.audio.tasks import Segmentation
from types import MethodType
from torch.optim import Adam
from pytorch_lightning.callbacks import (
EarlyStopping,
ModelCheckpoint,
RichProgressBar,
)
from pytorch_lightning.loggers import WandbLogger
torch.set_float32_matmul_precision('high')
def configure_optimizers(self):
optimizer = Adam(self.parameters(), lr=lr)
scheduler = LambdaLR(optimizer, lr_lambda=lambda epoch: 0.7 ** epoch if epoch < 5 else 0.75 ** epoch, verbose=True)
# scheduler = StepLR(optimizer, step_size=, gamma=0.5)
return [optimizer], [scheduler]
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--database', required=True,
help='path to database.yaml file')
parser.add_argument('--wandb-project', type=str, required=True,
help='wandb project name for the logging')
parser.add_argument('--use-pretrained', default=False, action='store_true',
help='load pretrained model from huggingface hub')
parser.add_argument('--init-model', required=False, help='path to model to initialize the NN')
parser.add_argument('--workers', type=int, default=8)
# model options
parser.add_argument('--chunk-dur', default=10.0, type=float,
help='LSTM chunk duration')
parser.add_argument('--lr', default=1e-3, type=float,
help='Adam learning rate')
parser.add_argument('--epochs', type=int, default=20)
parser.add_argument('--batch-size', type=int, default=64)
parser.add_argument('--wavlm-type', type=str, default='WAVLM_BASE',
choices=['WAVLM_BASE', 'WAVLM_BASE_PLUS', 'WAVLM_LARGE'])
# LSTM options
parser.add_argument('--lstm-type', choices=['pyannet', 'sseriouss'], default='pyannet')
parser.add_argument('--lstm-hidden-size', type=int, default=256)
parser.add_argument('--lstm-dropout', type=float, default=0.0)
parser.add_argument('--lstm-num-layers', type=int, default=2)
args = parser.parse_args()
registry.load_database(args.database)
dataset = registry.get_protocol('audiodb.SpeakerDiarization.train_protocol',
preprocessors={'audio': FileFinder()})
task = Segmentation(
dataset,
duration=args.chunk_dur,
max_speakers_per_chunk=3,
max_speakers_per_frame=2,
batch_size=args.batch_size,
num_workers=args.workers)
if args.use_pretrained and args.init_model:
print(f'Can\'t load a pretrained model and at the same time initialize, fix your arguments.')
sys.exit(1)
elif args.use_pretrained:
model = Model.from_pretrained(
"pyannote/segmentation-3.0",
use_auth_token="XXXXX")
elif args.init_model:
model = Model.from_pretrained(args.init_model)
else:
LSTM = {
"hidden_size": args.lstm_hidden_size,
"num_layers": args.lstm_num_layers,
"bidirectional": True,
"monolithic": True,
"dropout": args.lstm_dropout,
}
if args.lstm_type == 'pyannet':
model = PyanNet(task=task, lstm=LSTM)
elif args.lstm_type == 'sseriouss':
model = SSeRiouSS(wav2vec=args.wavlm_type, task=task, lstm=LSTM)
else:
raise NotImplementedError("Unsupported LSTM type")
lr = args.lr
model.configure_optimizers = MethodType(configure_optimizers, model)
model.task = task
checkpoint = ModelCheckpoint(
every_n_epochs=1,
save_last=True,
save_weights_only=False,
filename="{epoch}",
verbose=True
)
callbacks = [RichProgressBar(), checkpoint]
# we train for at most 20 epochs
from pytorch_lightning import Trainer
wandb_logger = WandbLogger(log_model="all", project=args.wandb_project)
trainer = Trainer(accelerator="gpu",
callbacks=callbacks,
max_epochs=args.epochs,
gradient_clip_val=0.5,
num_sanity_val_steps=10,
logger=wandb_logger,
strategy='ddp_find_unused_parameters_true',
)
trainer.fit(model, ckpt_path=args.init_model)
I think this problem has been fixed in develop
branch since https://github.com/pyannote/pyannote-audio/commit/c0b9e79aa8063c7ddc78e7213799d0aeae9d3d10
Can you try with the latest commit?
That was it, thank you. Closing now.
Tested versions
3.1.1
System information
ubuntu 20.04, 2xGPU A100
Issue description
Hello Hervé,
I am having issues with multi-GPU training that I am not sure how to solve. I would appreciate some feedback.
This is how I run the script on two GPUs:
To my
Trainer
part, I added:and removed (based on the PyTorch lightning docs)
But I am getting
Which seems true (but
Model
hasexample_output
)Very similar thing happens with
SSeRiouSS
model, but with that one I am gettingI tried to move some tensors to GPU via
.to(self.device)
- but it is not advised to use in PyTorch lightning. They recommend.as_type(x)
, which unfortunately always leads to the sameexample_output
error.When using a single GPU
and keeping
model = model.cuda()
it works fine forSSeRiouSS
but not forPyanNet
.I am willing to contribute to this, I would just need some pointers first. Thank you!
Minimal reproduction example (MRE)
can't share my data, sorry