thuiar / MMSA

MMSA is a unified framework for Multimodal Sentiment Analysis.
MIT License
685 stars 106 forks source link

Supplementary note about the results #10

Closed iyuge2 closed 3 years ago

iyuge2 commented 3 years ago

Hi,

Results listed in the MMSA/results/result-stat.md are reproduced under the same tuning and running settings. First, we tried 50 sets of parameters for each model on the same dataset with grid search. Then the parameters with best performance in validation set are selected as the final one.

Unfortunately, we lost the original parameters in our paper when we re-run all models and datasets. But you can try the following parameters, which can get comparable or better results than our work in AAAI 2021.

Best wishes! Thank you~

def __SELF_MM(self):
    tmp = {
        'commonParas':{
            'need_data_aligned': False,
            'need_model_aligned': False,
            'need_normalized': False,
            'use_bert': True,
            'use_finetune': True,
            'save_labels': False,
            'early_stop': 8,
            'update_epochs': 4
        },
        # dataset
        'datasetParas':{
            'mosi':{
                # the batch_size of each epoch is update_epochs * batch_size
                'batch_size': 16,
                'learning_rate_bert': 5e-5,
                'learning_rate_audio': 0.005,
                'learning_rate_video': 0.005,
                'learning_rate_other': 0.001,
                'weight_decay_bert': 0.001,
                'weight_decay_audio': 0.001,
                'weight_decay_video': 0.001,
                'weight_decay_other': 0.001,
                # feature subNets
                'a_lstm_hidden_size': 16,
                'v_lstm_hidden_size': 32,
                'a_lstm_layers': 1,
                'v_lstm_layers': 1,
                'text_out': 768, 
                'audio_out': 16,
                'video_out': 32, 
                'a_lstm_dropout': 0.0,
                'v_lstm_dropout': 0.0,
                't_bert_dropout':0.1,
                # post feature
                'post_fusion_dim': 128,
                'post_text_dim':32,
                'post_audio_dim': 16,
                'post_video_dim': 32,
                'post_fusion_dropout': 0.0,
                'post_text_dropout': 0.1,
                'post_audio_dropout': 0.1,
                'post_video_dropout': 0.0,
                # res
                'H': 3.0
            },
            'mosei':{
                # the batch_size of each epoch is update_epochs * batch_size
                'batch_size': 32,
                'learning_rate_bert': 5e-5,
                'learning_rate_audio': 0.005,
                'learning_rate_video': 1e-4,
                'learning_rate_other': 1e-3,
                'weight_decay_bert': 0.001,
                'weight_decay_audio': 0.0,
                'weight_decay_video': 0.0,
                'weight_decay_other': 0.01,
                # feature subNets
                'a_lstm_hidden_size': 16,
                'v_lstm_hidden_size': 32,
                'a_lstm_layers': 1,
                'v_lstm_layers': 1,
                'text_out': 768, 
                'audio_out': 16,
                'video_out': 32, 
                'a_lstm_dropout': 0.0,
                'v_lstm_dropout': 0.0,
                't_bert_dropout':0.1,
                # post feature
                'post_fusion_dim': 128,
                'post_text_dim':32,
                'post_audio_dim': 16,
                'post_video_dim': 32,
                'post_fusion_dropout': 0.1,
                'post_text_dropout': 0.0,
                'post_audio_dropout': 0.0,
                'post_video_dropout': 0.0,
                # res
                'H': 3.0
            },
            'sims':{
                # the batch_size of each epoch is update_epochs * batch_size
                'batch_size': 32,
                'learning_rate_bert': 5e-5,
                'learning_rate_audio': 5e-3,
                'learning_rate_video': 5e-3,
                'learning_rate_other': 1e-3,
                'weight_decay_bert': 0.001,
                'weight_decay_audio': 0.01,
                'weight_decay_video': 0.01,
                'weight_decay_other': 0.001,
                # feature subNets
                'a_lstm_hidden_size': 16,
                'v_lstm_hidden_size': 64,
                'a_lstm_layers': 1,
                'v_lstm_layers': 1,
                'text_out': 768, 
                'audio_out': 16,
                'video_out': 32, 
                'a_lstm_dropout': 0.0,
                'v_lstm_dropout': 0.0,
                't_bert_dropout':0.1,
                # post feature
                'post_fusion_dim': 128,
                'post_text_dim':64,
                'post_audio_dim': 16,
                'post_video_dim': 32,
                'post_fusion_dropout': 0.0,
                'post_text_dropout': 0.1,
                'post_audio_dropout': 0.1,
                'post_video_dropout': 0.0,
                # res
                'H': 1.0
            },
        },
    }
    return tmp