open-mmlab / mmsegmentation

OpenMMLab Semantic Segmentation Toolbox and Benchmark.
https://mmsegmentation.readthedocs.io/en/main/
Apache License 2.0
8.32k stars 2.63k forks source link

This error always occurs when I use segformer for semantic segmentation #3727

Open luoyq6 opened 4 months ago

luoyq6 commented 4 months ago

Traceback (most recent call last): File "/data/luoyq/mmsegmentation_103/./tools/train.py", line 104, in main() File "/data/luoyq/mmsegmentation_103/./tools/train.py", line 100, in main runner.train() File "/opt/workspace/.conda/envs/lyq_py3.10/lib/python3.10/site-packages/mmengine/runner/runner.py", line 1777, in train model = self.train_loop.run() # type: ignore File "/opt/workspace/.conda/envs/lyq_py3.10/lib/python3.10/site-packages/mmengine/runner/loops.py", line 287, in run self.run_iter(data_batch) File "/opt/workspace/.conda/envs/lyq_py3.10/lib/python3.10/site-packages/mmengine/runner/loops.py", line 311, in run_iter outputs = self.runner.model.train_step( File "/opt/workspace/.conda/envs/lyq_py3.10/lib/python3.10/site-packages/mmengine/model/wrappers/distributed.py", line 121, in train_step losses = self._run_forward(data, mode='loss') File "/opt/workspace/.conda/envs/lyq_py3.10/lib/python3.10/site-packages/mmengine/model/wrappers/distributed.py", line 161, in _run_forward results = self(data, mode=mode) File "/opt/workspace/.conda/envs/lyq_py3.10/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl return forward_call(*args, *kwargs) File "/opt/workspace/.conda/envs/lyq_py3.10/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1156, in forward output = self._run_ddp_forward(inputs, kwargs) File "/opt/workspace/.conda/envs/lyq_py3.10/lib/python3.10/site-packages/torch/nn/parallel/distributed.py", line 1110, in _run_ddp_forward return module_to_run(*inputs[0], *kwargs[0]) # type: ignore[index] File "/opt/workspace/.conda/envs/lyq_py3.10/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl return forward_call(args, **kwargs) File "/data/luoyq/mmsegmentation_103/mmseg/models/segmentors/base.py", line 94, in forward return self.loss(inputs, data_samples) File "/data/luoyq/mmsegmentation_103/mmseg/models/segmentors/encoder_decoder.py", line 178, in loss loss_decode = self._decode_head_forward_train(x, data_samples) File "/data/luoyq/mmsegmentation_103/mmseg/models/segmentors/encoder_decoder.py", line 139, in _decode_head_forward_train loss_decode = self.decode_head.loss(inputs, data_samples, File "/data/luoyq/mmsegmentation_103/mmseg/models/decode_heads/decode_head.py", line 262, in loss losses = self.loss_by_feat(seg_logits, batch_data_samples) File "/data/luoyq/mmsegmentation_103/mmseg/models/decode_heads/decode_head.py", line 305, in loss_by_feat seg_label = self._stack_batch_gt(batch_data_samples) File "/data/luoyq/mmsegmentation_103/mmseg/models/decode_heads/decode_head.py", line 289, in _stack_batch_gt return torch.stack(gt_semantic_segs, dim=0)

checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b4_20220624-d588d980.pth' crop_size = ( 56, 56, ) data_preprocessor = dict( bgr_to_rgb=True, mean=[ 123.675, 116.28, 103.53, ], pad_val=0, seg_pad_val=255, size=( 56, 56, ), std=[ 58.395, 57.12, 57.375, ], type='SegDataPreProcessor') data_root = '/opt/workspace/luoyq/mmsegmentation_103/data/seban_roi/VOCdevkit/VOC2012' dataset_type = 'PascalVOCDataset' default_hooks = dict( checkpoint=dict( by_epoch=False, interval=200, save_best='mIoU', type='CheckpointHook'), logger=dict(interval=50, log_metric_by_epoch=False, type='LoggerHook'), param_scheduler=dict(type='ParamSchedulerHook'), sampler_seed=dict(type='DistSamplerSeedHook'), timer=dict(type='IterTimerHook'), visualization=dict(type='SegVisualizationHook')) default_scope = 'mmseg' env_cfg = dict( cudnn_benchmark=True, dist_cfg=dict(backend='nccl'), mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0)) img_ratios = [ 0.5, 0.75, 1.0, 1.25, 1.5, 1.75, ] launcher = 'pytorch' load_from = None log_level = 'INFO' log_processor = dict(by_epoch=False) model = dict( backbone=dict( attn_drop_rate=0.0, drop_path_rate=0.1, drop_rate=0.0, embed_dims=64, in_channels=3, init_cfg=dict( checkpoint= 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b4_20220624-d588d980.pth', type='Pretrained'), mlp_ratio=4, num_heads=[ 1, 2, 5, 8, ], num_layers=[ 3, 8, 27, 3, ], num_stages=4, out_indices=( 0, 1, 2, 3, ), patch_sizes=[ 7, 3, 3, 3, ], qkv_bias=True, sr_ratios=[ 8, 4, 2, 1, ], type='MixVisionTransformer'), data_preprocessor=dict( bgr_to_rgb=True, mean=[ 123.675, 116.28, 103.53, ], pad_val=0, seg_pad_val=255, size=( 56, 56, ), std=[ 58.395, 57.12, 57.375, ], type='SegDataPreProcessor'), decode_head=dict( align_corners=False, channels=256, dropout_ratio=0.1, in_channels=[ 64, 128, 320, 512, ], in_index=[ 0, 1, 2, 3, ], loss_decode=dict( loss_weight=1.0, type='CrossEntropyLoss', use_sigmoid=False), norm_cfg=dict(requires_grad=True, type='SyncBN'), num_classes=2, type='SegformerHead'), pretrained=None, test_cfg=dict(crop_size=( 56, 56, ), mode='slide', stride=( 32, 32, )), train_cfg=dict(), type='EncoderDecoder') norm_cfg = dict(requires_grad=True, type='SyncBN') optim_wrapper = dict( loss_scale='dynamic', optimizer=dict( betas=( 0.9, 0.999, ), lr=6e-05, type='AdamW', weight_decay=0.01), paramwise_cfg=dict( custom_keys=dict( head=dict(lr_mult=10.0), norm=dict(decay_mult=0.0), pos_block=dict(decay_mult=0.0))), type='AmpOptimWrapper') optimizer = dict(lr=0.01, momentum=0.9, type='SGD', weight_decay=0.0005) param_scheduler = [ dict( begin=0, by_epoch=False, end=1500, start_factor=1e-06, type='LinearLR'), dict( begin=1500, by_epoch=False, end=20000, eta_min=0.0, power=1.0, type='PolyLR'), ] resume = False test_cfg = dict(type='TestLoop') test_dataloader = dict( batch_size=1, dataset=dict( ann_file='ImageSets/Segmentation/val.txt', data_prefix=dict( img_path='JPEGImages', seg_map_path='SegmentationClass'), data_root= '/opt/workspace/luoyq/mmsegmentation_103/data/seban_roi/VOCdevkit/VOC2012', pipeline=[ dict(type='LoadImageFromFile'), dict(keep_ratio=False, scale=( 112, 99, ), type='Resize'), dict(type='LoadAnnotations'), dict(type='PackSegInputs'), ], type='PascalVOCDataset'), num_workers=6, persistent_workers=True, sampler=dict(shuffle=False, type='DefaultSampler')) test_evaluator = dict( iou_metrics=[ 'mIoU', ], type='IoUMetric') test_pipeline = [ dict(type='LoadImageFromFile'), dict(keep_ratio=False, scale=( 112, 99, ), type='Resize'), dict(type='LoadAnnotations'), dict(type='PackSegInputs'), ] train_cfg = dict(max_iters=20000, type='IterBasedTrainLoop', val_interval=200) train_dataloader = dict( batch_size=4, dataset=dict( ann_file='ImageSets/Segmentation/train.txt', data_prefix=dict( img_path='JPEGImages', seg_map_path='SegmentationClass'), data_root= '/opt/workspace/luoyq/mmsegmentation_103/data/seban_roi/VOCdevkit/VOC2012', pipeline=[ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations'), dict( keep_ratio=True, ratio_range=( 0.5, 2.0, ), scale=( 109, 112, ), type='RandomResize'), dict(cat_max_ratio=0.75, crop_size=( 56, 56, ), type='RandomCrop'), dict(prob=0.5, type='RandomFlip'), dict(type='PhotoMetricDistortion'), dict(type='PackSegInputs'), ], type='PascalVOCDataset'), num_workers=6, persistent_workers=True, sampler=dict(shuffle=True, type='InfiniteSampler')) train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations'), dict( keep_ratio=True, ratio_range=( 0.5, 2.0, ), scale=( 109, 112, ), type='RandomResize'), dict(cat_max_ratio=0.75, crop_size=( 56, 56, ), type='RandomCrop'), dict(prob=0.5, type='RandomFlip'), dict(type='PhotoMetricDistortion'), dict(type='PackSegInputs'), ] tta_model = dict(type='SegTTAModel') tta_pipeline = [ dict(backend_args=None, type='LoadImageFromFile'), dict( transforms=[ [ dict(keep_ratio=True, scale_factor=0.5, type='Resize'), dict(keep_ratio=True, scale_factor=0.75, type='Resize'), dict(keep_ratio=True, scale_factor=1.0, type='Resize'), dict(keep_ratio=True, scale_factor=1.25, type='Resize'), dict(keep_ratio=True, scale_factor=1.5, type='Resize'), dict(keep_ratio=True, scale_factor=1.75, type='Resize'), ], [ dict(direction='horizontal', prob=0.0, type='RandomFlip'), dict(direction='horizontal', prob=1.0, type='RandomFlip'), ], [ dict(type='LoadAnnotations'), ], [ dict(type='PackSegInputs'), ], ], type='TestTimeAug'), ] val_cfg = dict(type='ValLoop') val_dataloader = dict( batch_size=1, dataset=dict( ann_file='ImageSets/Segmentation/val.txt', data_prefix=dict( img_path='JPEGImages', seg_map_path='SegmentationClass'), data_root= '/opt/workspace/luoyq/mmsegmentation_103/data/seban_roi/VOCdevkit/VOC2012', pipeline=[ dict(type='LoadImageFromFile'), dict(keep_ratio=False, scale=( 112, 99, ), type='Resize'), dict(type='LoadAnnotations'), dict(type='PackSegInputs'), ], type='PascalVOCDataset'), num_workers=6, persistent_workers=True, sampler=dict(shuffle=False, type='DefaultSampler')) val_evaluator = dict( iou_metrics=[ 'mIoU', ], type='IoUMetric') vis_backends = [ dict(type='LocalVisBackend'), ] visualizer = dict( name='visualizer', type='SegLocalVisualizer', vis_backends=[ dict(type='LocalVisBackend'), ]) work_dir = 'work_dirs/segformer_mit-b4_8xb1-20k_seban-56x56_v1'

luoyq6 commented 4 months ago

RuntimeError: stack expects each tensor to be equal size, but got [1, 56, 56] at entry 0 and [1, 18, 56] at entry 1

chengxuyuansun123 commented 4 months ago

Hi, do you have any solutions now?Thanks.