yassouali / CCT

:page_facing_up: Semi-Supervised Semantic Segmentation with Cross-Consistency Training (CVPR 2020).
https://yassouali.github.io/cct_page/
MIT License
395 stars 58 forks source link

Too high performance of semi supervised learning #39

Closed won-bae closed 3 years ago

won-bae commented 3 years ago

Hi, thank you for sharing the code for such a great paper. When I was trying to reproduce semi supervised learning with 1.5K pixel labels in table1, I got 72.0 which is way too higher than the reported number 69.4. I set use_weak_lables: false, supervised: false, semi: true and epochs: 50. If my understanding on your paper is correct, this is the right setting for CCT 1.5K in table1. For your information here is the whole config I used. Am I missing something here?

{
    "name": "CCT",
    "experim_name": "CCT",
    "n_gpu": 1,
    "n_labeled_examples": 1464,
    "diff_lrs": true,
    "ramp_up": 0.1,
    "unsupervised_w": 30,
    "ignore_index": 255,
    "lr_scheduler": "Poly",
    "use_weak_lables": false,
    "weakly_loss_w": 0.4,
    "pretrained": true,

    "model":{
        "supervised": false,
        "semi": true,
        "supervised_w": 1,

        "sup_loss": "CE",
        "un_loss": "MSE",

        "softmax_temp": 1,
        "aux_constraint": false,
        "aux_constraint_w": 1,
        "confidence_masking": false,
        "confidence_th": 0.5,

        "drop": 6,
        "drop_rate": 0.5,
        "spatial": true,

        "cutout": 6,
        "erase": 0.4,

        "vat": 2,
        "xi": 1e-6,
        "eps": 2.0,

        "context_masking": 2,
        "object_masking": 2,
        "feature_drop": 6,

        "feature_noise": 6,
        "uniform_range": 0.3
    },

    "optimizer": {
        "type": "SGD",
        "args":{
            "lr": 1e-2,
            "weight_decay": 1e-4,
            "momentum": 0.9
        }
    },

    "train_supervised": {
        "data_dir": "VOCtrainval_11-May-2012",
        "batch_size": 10,
        "crop_size": 320,
        "shuffle": true,
        "base_size": 400,
        "scale": true,
        "augment": true,
        "flip": true,
        "rotate": false,
        "blur": false,
        "split": "train_supervised",
        "num_workers": 8
    },

    "train_unsupervised": {
        "data_dir": "VOCtrainval_11-May-2012",
        "weak_labels_output": "pseudo_labels/result/pseudo_labels",
        "batch_size": 10,
        "crop_size": 320,
        "shuffle": true,
        "base_size": 400,
        "scale": true,
        "augment": true,
        "flip": true,
        "rotate": false,
        "blur": false,
        "split": "train_unsupervised",
        "num_workers": 8
    },

    "val_loader": {
        "data_dir": "VOCtrainval_11-May-2012",
        "batch_size": 1,
        "val": true,
        "split": "val",
        "shuffle": false,
        "num_workers": 4
    },

    "trainer": {
        "epochs": 50,
        "save_dir": "/home/whbae/scratch/CCT.tmp/1464_semi",
        "save_period": 5,

        "monitor": "max Mean_IoU",
        "early_stop": 10,

        "tensorboardX": true,
        "log_dir": "saved/",
        "log_per_iter": 20,

        "val": true,
        "val_per_epochs": 5
    }
}
yassouali commented 3 years ago

Hi @won-bae

Yes you are correct, the paper's results were with an older version of the code and libraries, and the performance is generally in the range 70-72 depending on the setup with this repo, we just didn't update the paper since the reported results are not that different and can vary with different torch and torchvision versions.

won-bae commented 3 years ago

Thanks for the clarification. I ran several experiments with the same setting and confirmed that 72.0 is indeed an extreme case.