Open buanide opened 4 months ago
We plan to support fine-tuning but no specific ETA. If you mean training a downstream model with features from emotion2vec+ large, you can just extract the features and train the model.
Thank you four your answer. What class should I instantiate to train the model emontion2vec+large ?
Hi, I have tried to use the same process as you have done with iemocap but with Emodb. I got very bad results : Average WA: 34.2156862745098%; UA: 28.42471764346764%; F1: 29.36723608865055% . I have extracted features like this:
python extract_features.py --data . --model /home/stage2024/app/codes/models/emotion2vec/upstream --split file_paths --checkpoint /home/stage2024/app/codes/models/emotion2vec_base/emotion2vec_base.pt --save-dir . --layer 11
Can you have a look on my code please ?
I have done a cross validatin 80%,10%,10% as recommanded in the paper :
import torch import torch.optim as optim import torch.nn as nn from sklearn.model_selection import KFold import numpy as np import os from pathlib import Path import logging import hydra from data import SpeechDataset from sklearn.model_selection import train_test_split from torch.utils.data import Dataset, DataLoader, random_split import sys parent_dir = os.path.abspath(os.path.join(os.path.dirname(file), '../../../../')) if parent_dir not in sys.path: sys.path.insert(0, parent_dir)
from data import load_ssl_features from model import BaseModel from utils import train_one_epoch, validate_and_test from codes.process import labels_num_EMODB from omegaconf import DictConfig from codes.process import load_EmoDB
def extract_features_and_labels(indices,feats,sizes,offsets,labels): feats_list = [] sizes_list = [] offsets_list = [] labels_list = []
for idx in indices:
start = offsets[idx]
end = start + sizes[idx]
feats_list.append(feats[start:end, :])
sizes_list.append(sizes[idx])
offsets_list.append(0 if len(offsets_list) == 0 else offsets_list[-1] + sizes_list[-2])
labels_list.append(labels[idx])
feats = np.concatenate(feats_list, axis=0)
sizes = np.array(sizes_list)
offsets = np.array(offsets_list)
labels = labels_list
return feats, sizes, offsets, labels
def create_data_loaders(dataset, train_indices, test_indices,val_indices,batch_size): feats = dataset["feats"] sizes = dataset["sizes"] offsets = dataset["offsets"] labels = dataset["labels"]
train_feats, train_sizes, train_offsets, train_labels = extract_features_and_labels(train_indices,feats,sizes,offsets,labels)
train_dataset = SpeechDataset(feats=train_feats, sizes=train_sizes, offsets=train_offsets, labels=train_labels)
# Extracting validation dataset features
val_feats, val_sizes, val_offsets, val_labels = extract_features_and_labels(val_indices,feats,sizes,offsets,labels)
val_dataset = SpeechDataset(feats=val_feats, sizes=val_sizes, offsets=val_offsets, labels=val_labels)
# Extracting test dataset features
test_feats, test_sizes, test_offsets, test_labels = extract_features_and_labels(test_indices,feats,sizes,offsets,labels)
test_dataset = SpeechDataset(feats=test_feats, sizes=test_sizes, offsets=test_offsets, labels=test_labels)
# Creating DataLoaders
train_loader = DataLoader(train_dataset, batch_size=batch_size, collate_fn=train_dataset.collator, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, collate_fn=val_dataset.collator, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, collate_fn=test_dataset.collator, shuffle=False)
return train_loader, val_loader, test_loader
@hydra.main(config_path='config', config_name='default.yaml')
def train_data(cfg: DictConfig):
#print(cfg.common.seed)
#torch.manual_seed(cfg.common.seed)
label_dict = {
"anger":0,
"happiness":1,
"sadness":2,
"neutral":3
}
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.cuda.empty_cache()
data_EMODB=load_EmoDB(cfg.dataset.database_path)
dataset = load_ssl_features(data_EMODB,cfg.dataset.feat_path,cfg.dataset.feat_length, label_dict)
kfold = KFold(n_splits=10, shuffle=True, random_state=cfg.common.seed)
test_wa_avg, test_ua_avg, test_f1_avg = 0., 0., 0.
for fold, (train_indices, test_indices) in enumerate(kfold.split(np.arange(len(dataset["labels"])))):
print(f"------Now it's {fold+1}th fold------")
train_indices_fold, val_indices = train_test_split(train_indices, test_size=0.111, random_state=cfg.common.seed)
train_loader, val_loader, test_loader = create_data_loaders(dataset, train_indices_fold, test_indices,val_indices, cfg.dataset.batch_size)
model = BaseModel(input_dim=768, output_dim=len(label_dict))
model = model.to(device)
optimizer = optim.RMSprop(model.parameters(), lr=cfg.optimization.lr, momentum=0.9)
scheduler = optim.lr_scheduler.CyclicLR(optimizer, base_lr=cfg.optimization.lr, max_lr=1e-3, step_size_up=10)
criterion = nn.CrossEntropyLoss()
best_val_wa = 0
best_val_wa_epoch = 0
save_dir = os.path.join(str(Path.cwd()), f"model_{fold+1}.pth")
for epoch in range(cfg.optimization.epoch): # Adjust the number of epochs as per your requirement
train_loss = train_one_epoch(model, optimizer, criterion, train_loader, device)
scheduler.step()
val_wa, val_ua, val_f1 = validate_and_test(model, val_loader, device, num_classes=len(label_dict))
if val_wa > best_val_wa:
best_val_wa = val_wa
best_val_wa_epoch = epoch
torch.save(model.state_dict(), save_dir)
print(f"Epoch {epoch+1}, Training Loss: {train_loss/len(train_loader):.6f}, Validation WA: {val_wa:.2f}%; UA: {val_ua:.2f}%; F1: {val_f1:.2f}%")
ckpt = torch.load(save_dir)
model.load_state_dict(ckpt, strict=True)
test_wa, test_ua, test_f1 = validate_and_test(model, test_loader, device, num_classes=len(label_dict))
print(f"The {fold+1}th Fold at epoch {best_val_wa_epoch + 1}, test WA {test_wa}%; UA {test_ua}%; F1 {test_f1}%")
test_wa_avg += test_wa
test_ua_avg += test_ua
test_f1_avg += test_f1
print(f"Average WA: {test_wa_avg/10}%; UA: {test_ua_avg/10}%; F1: {test_f1_avg/10}%")
if name == "main": train_data()
Maybe using emotion2vec_base is a better way for feature representation, rather than the emotion2vec_plus series.
How can I fine tune the emotion2vec+large model on another dataset without using the process that you have used for iemocap?
I have tried to use four features and your bash script train.sh but I got this error:
File "C:\Users\doki_engbu\AppData\Local\Programs\Python\Python311\Lib\multiprocessing\spawn.py", line 122, in spawn_main exitcode = _main(fd, parent_sentinel) ^^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\doki_engbu\AppData\Local\Programs\Python\Python311\Lib\multiprocessing\spawn.py", line 132, in _main self = reduction.pickle.load(from_parent) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ _pickle.UnpicklingError: pickle data was truncated
.