Open WHUThcz opened 2 months ago
the config and dataset
# Copyright (c) OpenMMLab. All rights reserved. import os.path as osp import mmengine.fileio as fileio from mmseg.registry import DATASETS from .basesegdataset import BaseSegDataset @DATASETS.register_module() class PascalVOCDataset(BaseSegDataset): """Pascal VOC dataset. Args: split (str): Split txt file for Pascal VOC. """ METAINFO = dict( classes=('background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'), palette=[[0, 0, 0], [128, 0, 0], [0, 128, 0], [128, 128, 0], [0, 0, 128], [128, 0, 128], [0, 128, 128], [128, 128, 128], [64, 0, 0], [192, 0, 0], [64, 128, 0], [192, 128, 0], [64, 0, 128], [192, 0, 128], [64, 128, 128], [192, 128, 128], [0, 64, 0], [128, 64, 0], [0, 192, 0], [128, 192, 0], [0, 64, 128]]) def __init__(self, ann_file, img_suffix='.jpg', seg_map_suffix='.png', **kwargs) -> None: super().__init__( img_suffix=img_suffix, seg_map_suffix=seg_map_suffix, ann_file=ann_file, **kwargs) assert fileio.exists(self.data_prefix['img_path'], self.backend_args) and osp.isfile(self.ann_file)
crop_size = ( 512, 512, ) data_preprocessor = dict( bgr_to_rgb=True, mean=[ 123.675, 116.28, 103.53, ], pad_val=0, seg_pad_val=255, size=( 512, 512, ), std=[ 58.395, 57.12, 57.375, ], type='SegDataPreProcessor') data_root = 'data/VOCdevkit/VOC2012' dataset_type = 'PascalVOCDataset' default_hooks = dict( checkpoint=dict(by_epoch=False, interval=5000, type='CheckpointHook'), logger=dict(interval=50, log_metric_by_epoch=False, type='LoggerHook'), param_scheduler=dict(type='ParamSchedulerHook'), sampler_seed=dict(type='DistSamplerSeedHook'), timer=dict(type='IterTimerHook'), visualization=dict(type='SegVisualizationHook')) default_scope = 'mmseg' env_cfg = dict( cudnn_benchmark=True, dist_cfg=dict(backend='nccl'), mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0)) img_ratios = [ 0.5, 0.75, 1.0, 1.25, 1.5, 1.75, ] launcher = 'pytorch' load_from = 'fpvt_pretrained.pth' log_level = 'INFO' log_processor = dict(by_epoch=False) model = dict( backbone=dict( type='fpvt_tiny'), data_preprocessor=dict( bgr_to_rgb=True, mean=[ 123.675, 116.28, 103.53, ], pad_val=0, seg_pad_val=255, size=( 512, 512, ), std=[ 58.395, 57.12, 57.375, ], type='SegDataPreProcessor'), decode_head=dict( align_corners=False, channels=128, dropout_ratio=0.1, feature_strides=[ 4, 8, 16, 32, ], in_channels=[ 256, 256, 256, 256, ], in_index=[ 0, 1, 2, 3, ], loss_decode=dict( loss_weight=1.0, type='CrossEntropyLoss', use_sigmoid=False), norm_cfg=dict(requires_grad=True, type='SyncBN'), num_classes=21, type='FPNHead'), neck=dict( in_channels=[ 32, 64, 160, 256, ], num_outs=4, out_channels=256, type='FPN'), pretrained=True, test_cfg=dict(mode='whole'), train_cfg=dict(), type='EncoderDecoder') norm_cfg = dict(requires_grad=True, type='SyncBN') optim_wrapper = dict( optimizer=dict( betas=( 0.9, 0.999, ), lr=4e-05, type='AdamW', weight_decay=0.01), # optimizer=dict(lr=0.01, momentum=0.9, type='SGD', weight_decay=0.0005), paramwise_cfg=dict( bypass_duplicate=True, custom_keys=dict( head=dict(lr_mult=10.0), norm=dict(decay_mult=0.0), pos_block=dict(decay_mult=0.0))), type='OptimWrapper') param_scheduler = [ dict( begin=0, by_epoch=False, end=1500, start_factor=1e-06, type='LinearLR'), dict( begin=1500, by_epoch=False, end=320000, eta_min=0.0, power=0.9, type='PolyLR'), ] resume = False test_cfg = dict(type='TestLoop') test_dataloader = dict( batch_size=1, dataset=dict( ann_file='ImageSets/Segmentation/val.txt', data_prefix=dict( img_path='JPEGImages', seg_map_path='SegmentationClass'), data_root='data/VOCdevkit/VOC2012', pipeline=[ dict(type='LoadImageFromFile'), dict(keep_ratio=True, scale=( 2048, 512, ), type='Resize'), dict(type='LoadAnnotations'), dict(type='PackSegInputs'), ], reduce_zero_label=True, type='PascalVOCDataset'), num_workers=4, persistent_workers=True, sampler=dict(shuffle=False, type='DefaultSampler')) test_evaluator = dict( iou_metrics=[ 'mIoU', ], type='IoUMetric') test_pipeline = [ dict(type='LoadImageFromFile'), dict(keep_ratio=True, scale=( 2048, 512, ), type='Resize'), dict(type='LoadAnnotations'), dict(type='PackSegInputs'), ] train_cfg = dict( max_iters=320000, type='IterBasedTrainLoop', val_interval=5000) train_dataloader = dict( batch_size=8, dataset=dict( ann_file='ImageSets/Segmentation/train.txt', data_prefix=dict( img_path='JPEGImages', seg_map_path='SegmentationClass'), data_root='data/VOCdevkit/VOC2012', pipeline=[ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations'), dict( keep_ratio=True, ratio_range=( 0.5, 2.0, ), scale=( 2048, 512, ), type='RandomResize'), dict( cat_max_ratio=0.75, crop_size=( 512, 512, ), type='RandomCrop'), dict(prob=0.5, type='RandomFlip'), dict(type='PhotoMetricDistortion'), dict(type='PackSegInputs'), ], reduce_zero_label=True, type='PascalVOCDataset'), num_workers=2, persistent_workers=True, sampler=dict(shuffle=True, type='InfiniteSampler')) train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations'), dict( keep_ratio=True, ratio_range=( 0.5, 2.0, ), scale=( 2048, 512, ), type='RandomResize'), dict(cat_max_ratio=0.75, crop_size=( 512, 512, ), type='RandomCrop'), dict(prob=0.5, type='RandomFlip'), dict(type='PhotoMetricDistortion'), dict(type='PackSegInputs'), ] tta_model = dict(type='SegTTAModel') tta_pipeline = [ dict(backend_args=None, type='LoadImageFromFile'), dict( transforms=[ [ dict(keep_ratio=True, scale_factor=0.5, type='Resize'), dict(keep_ratio=True, scale_factor=0.75, type='Resize'), dict(keep_ratio=True, scale_factor=1.0, type='Resize'), dict(keep_ratio=True, scale_factor=1.25, type='Resize'), dict(keep_ratio=True, scale_factor=1.5, type='Resize'), dict(keep_ratio=True, scale_factor=1.75, type='Resize'), ], [ dict(direction='horizontal', prob=0.0, type='RandomFlip'), dict(direction='horizontal', prob=1.0, type='RandomFlip'), ], [ dict(type='LoadAnnotations'), ], [ dict(type='PackSegInputs'), ], ], type='TestTimeAug'), ] val_cfg = dict(type='ValLoop') val_dataloader = dict( batch_size=1, dataset=dict( ann_file='ImageSets/Segmentation/val.txt', data_prefix=dict( img_path='JPEGImages', seg_map_path='SegmentationClass'), data_root='data/VOCdevkit/VOC2012', pipeline=[ dict(type='LoadImageFromFile'), dict(keep_ratio=True, scale=( 2048, 512, ), type='Resize'), dict(type='LoadAnnotations'), dict(type='PackSegInputs'), ], reduce_zero_label=True, type='PascalVOCDataset'), num_workers=4, persistent_workers=True, sampler=dict(shuffle=False, type='DefaultSampler')) val_evaluator = dict( iou_metrics=[ 'mIoU', ], type='IoUMetric') vis_backends = [ dict(type='LocalVisBackend'), ] visualizer = dict( name='visualizer', type='SegLocalVisualizer', vis_backends=[ dict(type='LocalVisBackend'), ]) work_dir = './work_dirs/fpn_fpvt_t_pascal_voc12_40k'
When segmenting the Ade20k dataset, it is normal
the config and dataset