facebookresearch / detectron2

Detectron2 is a platform for object detection, segmentation and other visual recognition tasks.
https://detectron2.readthedocs.io/en/latest/
Apache License 2.0
30.55k stars 7.49k forks source link

I do augmentation but the number of data is not increased #3417

Closed leesangjoon1 closed 3 years ago

leesangjoon1 commented 3 years ago

If you do not know the root cause of the problem, please post according to this template:

Instructions To Reproduce the Issue:

Check https://stackoverflow.com/help/minimal-reproducible-example for how to ask good questions. Simplify the steps to reproduce the issue using suggestions from the above link, and provide them below:

  1. Full runnable code or full changes you made:
    
    If making changes to the project itself, please use output of the following command:
    git rev-parse HEAD; git diff
import itertools import logging import os from collections import OrderedDict import torch import detectron2.utils.comm as comm from detectron2.checkpoint import DetectionCheckpointer from detectron2.config import get_cfg from detectron2.data import MetadataCatalog from detectron2.engine import DefaultTrainer, default_argument_parser, default_setup, hooks, launch from detectron2.evaluation import ( CityscapesInstanceEvaluator, CityscapesSemSegEvaluator, COCOEvaluator, COCOPanopticEvaluator, DatasetEvaluators, LVISEvaluator, PascalVOCDetectionEvaluator, SemSegEvaluator, verify_results, ) from detectron2.modeling import GeneralizedRCNNWithTTA from detectron2.solver.build import maybe_add_gradient_clipping, get_default_optimizer_params from detectron2.data import transforms as T from swint import add_swint_config from detectron2.data.build import build_detection_test_loader, build_detection_train_loader from detectron2.data import DatasetMapper from detectron2.data.datasets import register_coco_instances register_coco_instances("data_train", {}, "/home/sangjoon/coco-annotator/mosquito_train_white_1_added_images.json", "/home/sangjoon/coco-annotator/datasets/mosquito_train_white_sharpen_filter_0826") register_coco_instances("data_val", {}, "/home/sangjoon/coco-annotator/mosquito_test_white_1_added_images.json", "/home/sangjoon/coco-annotator/datasets/mosquito_test_white_sharpen_filter_0826") def build_sem_seg_train_aug(cfg): augs = [ T.ResizeShortestEdge( cfg.INPUT.MIN_SIZE_TRAIN, cfg.INPUT.MAX_SIZE_TRAIN, cfg.INPUT.MIN_SIZE_TRAIN_SAMPLING ) ] augs.append(T.RandomFlip(prob=0.5)) augs.append(T.RandomBrightness(0.9,1.1)) augs.append(T.RandomCrop('absolute', (640, 640))) return augs class Trainer(DefaultTrainer): """ We use the "DefaultTrainer" which contains pre-defined default logic for standard training workflow. They may not work for you, especially if you are working on a new research project. In that case you can write your own training loop. You can use "tools/plain_train_net.py" as an example. """ @classmethod def build_evaluator(cls, cfg, dataset_name, output_folder=None): """ Create evaluator(s) for a given dataset. This uses the special metadata "evaluator_type" associated with each builtin dataset. For your own dataset, you can simply create an evaluator manually in your script and do not have to worry about the hacky if-else logic here. """ if output_folder is None: output_folder = os.path.join(cfg.OUTPUT_DIR, "inference") evaluator_list = [] evaluator_type = MetadataCatalog.get(dataset_name).evaluator_type if evaluator_type in ["sem_seg", "coco_panoptic_seg"]: evaluator_list.append( SemSegEvaluator( dataset_name, distributed=True, num_classes=cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES, ignore_label=cfg.MODEL.SEM_SEG_HEAD.IGNORE_VALUE, output_dir=output_folder, ) ) if evaluator_type in ["coco", "coco_panoptic_seg"]: evaluator_list.append(COCOEvaluator(dataset_name, cfg, True, output_folder)) if evaluator_type == "coco_panoptic_seg": evaluator_list.append(COCOPanopticEvaluator(dataset_name, output_folder)) if evaluator_type == "cityscapes_instance": assert ( torch.cuda.device_count() >= comm.get_rank() ), "CityscapesEvaluator currently do not work with multiple machines." return CityscapesInstanceEvaluator(dataset_name) if evaluator_type == "cityscapes_sem_seg": assert ( torch.cuda.device_count() >= comm.get_rank() ), "CityscapesEvaluator currently do not work with multiple machines." return CityscapesSemSegEvaluator(dataset_name) elif evaluator_type == "pascal_voc": return PascalVOCDetectionEvaluator(dataset_name) elif evaluator_type == "lvis": return LVISEvaluator(dataset_name, cfg, True, output_folder) if len(evaluator_list) == 0: raise NotImplementedError( "no Evaluator for the dataset {} with the type {}".format( dataset_name, evaluator_type ) ) elif len(evaluator_list) == 1: return evaluator_list[0] return DatasetEvaluators(evaluator_list) # @classmethod # def build_test_loader(cls, cfg, dataset_name): # return build_detection_test_loader(cfg, dataset_name, mapper=DatasetMapper(cfg, False)) @classmethod def build_train_loader(cls, cfg): if "GeneralizedRCNN" in cfg.MODEL.META_ARCHITECTURE: mapper = DatasetMapper(cfg, is_train=True, augmentations=build_sem_seg_train_aug(cfg)) else: mapper = None return build_detection_train_loader(cfg, mapper=mapper) @classmethod def test_with_TTA(cls, cfg, model): logger = logging.getLogger("detectron2.trainer") # In the end of training, run an evaluation with TTA # Only support some R-CNN models. logger.info("Running inference with test-time augmentation ...") model = GeneralizedRCNNWithTTA(cfg, model) evaluators = [ cls.build_evaluator( cfg, name, output_folder=os.path.join(cfg.OUTPUT_DIR, "inference_TTA") ) for name in cfg.DATASETS.TEST ] res = cls.test(cfg, model, evaluators) res = OrderedDict({k + "_TTA": v for k, v in res.items()}) return res @classmethod def build_optimizer(cls, cfg, model): params = get_default_optimizer_params( model, base_lr=cfg.SOLVER.BASE_LR, weight_decay=cfg.SOLVER.WEIGHT_DECAY, weight_decay_norm=cfg.SOLVER.WEIGHT_DECAY_NORM, bias_lr_factor=cfg.SOLVER.BIAS_LR_FACTOR, weight_decay_bias=cfg.SOLVER.WEIGHT_DECAY_BIAS, ) def maybe_add_full_model_gradient_clipping(optim): # optim: the optimizer class # detectron2 doesn't have full model gradient clipping now clip_norm_val = cfg.SOLVER.CLIP_GRADIENTS.CLIP_VALUE enable = ( cfg.SOLVER.CLIP_GRADIENTS.ENABLED and cfg.SOLVER.CLIP_GRADIENTS.CLIP_TYPE == "full_model" and clip_norm_val > 0.0 ) class FullModelGradientClippingOptimizer(optim): def step(self, closure=None): all_params = itertools.chain(*[x["params"] for x in self.param_groups]) torch.nn.utils.clip_grad_norm_(all_params, clip_norm_val) super().step(closure=closure) return FullModelGradientClippingOptimizer if enable else optim optimizer_type = cfg.SOLVER.OPTIMIZER if optimizer_type == "SGD": optimizer = maybe_add_gradient_clipping(torch.optim.SGD)( params, cfg.SOLVER.BASE_LR, momentum=cfg.SOLVER.MOMENTUM, nesterov=cfg.SOLVER.NESTEROV, weight_decay=cfg.SOLVER.WEIGHT_DECAY, ) elif optimizer_type == "AdamW": optimizer = maybe_add_full_model_gradient_clipping(torch.optim.AdamW)( params, cfg.SOLVER.BASE_LR, betas=(0.9, 0.999), weight_decay=cfg.SOLVER.WEIGHT_DECAY, ) else: raise NotImplementedError(f"no optimizer type {optimizer_type}") return optimizer def setup(args): """ Create configs and perform basic setups. """ cfg = get_cfg() add_swint_config(cfg) cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() default_setup(cfg, args) return cfg def main(args): cfg = setup(args) if args.eval_only: model = Trainer.build_model(cfg) DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load( cfg.MODEL.WEIGHTS, resume=args.resume ) res = Trainer.test(cfg, model) if cfg.TEST.AUG.ENABLED: res.update(Trainer.test_with_TTA(cfg, model)) if comm.is_main_process(): verify_results(cfg, res) return res """ If you'd like to do anything fancier than the standard training logic, consider writing your own training loop (see plain_train_net.py) or subclassing the trainer. """ trainer = Trainer(cfg) trainer.resume_or_load(resume=args.resume) #trainer.resume_or_load(resume=True) if cfg.TEST.AUG.ENABLED: trainer.register_hooks( [hooks.EvalHook(0, lambda: trainer.test_with_TTA(cfg, trainer.model))] ) return trainer.train() if __name__ == "__main__": args = default_argument_parser().parse_args() print("Command Line Args:", args) launch( main, args.num_gpus, num_machines=args.num_machines, machine_rank=args.machine_rank, dist_url=args.dist_url, args=(args,), ) ``` 2. What exact command you run: python3 train_net_swint.py --num-gpus 4 --config-file ~~ MODEL.WEIGTHS ~~ 3. __Full logs__ or other relevant observations: ``` [08/28 17:40:18 d2.data.dataset_mapper]: [DatasetMapper] Augmentations used in training: [ResizeShortestEdge(short_edge_length=(1800,), max_size=1800, sample_style='choice'), RandomFlip(prob=0.5), RandomBrightness(intensity_min=0.9, intensity_max=1.1), RandomCrop(crop_type='absolute', crop_size=(640, 640))] WARNING [08/28 17:40:18 d2.data.datasets.coco]: Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you. [08/28 17:40:18 d2.data.datasets.coco]: Loaded 487 images in COCO format from /home/sangjoon/coco-annotator/mosquito_train_white_1_added_images.json [08/28 17:40:18 d2.data.build]: Removed 0 images with no usable annotations. 487 images left. [08/28 17:40:18 d2.data.build]: Distribution of instances among all 6 categories: | category | #instances | category | #instances | category | #instances | |:----------:|:-------------|:----------:|:-------------|:----------:|:-------------| | cc | 728 | pp | 860 | aeaea | 274 | | toto | 181 | gogo | 458 | ururur | 166 | | | | | | | | | total | 2667 | | | | | [08/28 17:40:18 d2.data.build]: Using training sampler TrainingSampler [08/28 17:40:18 d2.data.common]: Serializing 487 elements to byte tensors and concatenating them all ... [08/28 17:40:18 d2.data.common]: Serialized dataset takes 0.41 MiB ``` ## Expected behavior: If there are no obvious crash in "full logs" provided above, please tell us the expected behavior. Total 2667 data is mine. I want to be increased using augmentation not replaced. How do I do?? If you expect a model to converge / work better, we do not help with such issues, unless a model fails to reproduce the results in detectron2 model zoo, or proves existence of bugs. ## Environment: Paste the output of the following command: ``` wget -nc -nv https://github.com/facebookresearch/detectron2/raw/master/detectron2/utils/collect_env.py && python collect_env.py ``` If your issue looks like an installation issue / environment issue, please first check common issues in https://detectron2.readthedocs.io/tutorials/install.html#common-installation-issues
ppwwyyxx commented 3 years ago

This is expected because the table prints the number of images in the source dataset before augmentation.

leesangjoon1 commented 3 years ago

This is expected because the table prints the number of images in the source dataset before augmentation.

but the training time is also decreased. I think if the number of data are increased, training time also would be increased. How can I check either the number of training image is increased?

INF800 commented 3 years ago

This is expected because the table prints the number of images in the source dataset before augmentation.

but the training time is also decreased. I think if the number of data are increased, training time also would be increased. How can I check either the number of training image is increased?

Is it because of T.RandomCrop('absolute', (640, 640))

leesangjoon1 commented 3 years ago

This is expected because the table prints the number of images in the source dataset before augmentation.

but the training time is also decreased. I think if the number of data are increased, training time also would be increased. How can I check either the number of training image is increased?

Is it because of T.RandomCrop('absolute', (640, 640))

Can I ask why that is the problem??