Open Xie-Muxi-BK opened 1 year ago
Hello, have you solved this problem? I am studying this problem, but I just learned this. I would like to ask if there is any difference between using Epoch and Iter, or just to get used to my own training method
You may need to modify the data configuration. Use 'DefaultSampler' instead of 'InfiniteSampler'.
train_dataloader = dict(
batch_size=2,
num_workers=4,
persistent_workers=True,
# sampler=dict(type='InfiniteSampler', shuffle=True),
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
img_path='images/training', seg_map_path='annotations/training'),
pipeline=train_pipeline))
不知道这么做是不是有瑕疵,你可以看一下我得设置,可以运行但是细节我没有仔细研究: norm_cfg = dict(type='SyncBN', requires_grad=True) model = dict( type='EncoderDecoder', backbone=dict( type='ResNet', depth=50, num_stages=4, out_indices=(0, 1, 2, 3), dilations=(1, 1, 2, 4), strides=(1, 2, 1, 1), norm_cfg=dict(type='SyncBN', requires_grad=True), norm_eval=False, style='pytorch', contract_dilation=True), decode_head=dict( type='PSPHead', in_channels=2048, in_index=3, channels=512, pool_scales=(1, 2, 3, 6), dropout_ratio=0.1, num_classes=3, norm_cfg=dict(type='SyncBN', requires_grad=True), align_corners=False, loss_decode=dict( type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), train_cfg=dict(), test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) dataset_type = 'PascalContextDataset' data_root = 'D:\MMlab\mmsegmentation-0.20.2\data\VOCdevkit\VOC2007' img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) img_scale=(480, 480) crop_size = (480, 480) train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations'), dict(type='Resize', img_scale=(480, 480), ratio_range=(1.0, 1.0)), dict(type='RandomCrop', crop_size=(480, 480), cat_max_ratio=0.75), dict(type='RandomFlip', prob=0.0), dict(type='PhotoMetricDistortion'), dict( type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), dict(type='Pad', size=(480, 480), pad_val=0, seg_pad_val=255), dict(type='DefaultFormatBundle'), dict(type='Collect', keys=['img', 'gt_semantic_seg']) ] test_pipeline = [ dict(type='LoadImageFromFile'), dict( type='MultiScaleFlipAug', img_scale=(480, 480), flip=False, transforms=[ dict(type='Resize', img_scale=(480, 480), ratio_range=(1.0, 1.0)), dict(type='RandomFlip', prob=0.0), dict( type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), dict(type='ImageToTensor', keys=['img']), dict(type='Collect', keys=['img']) ] )
] data = dict( samples_per_gpu=3, workers_per_gpu=1, train=dict( type='PascalContextDataset', data_root='D:\MMlab\mmsegmentation-0.20.2\data\VOCdevkit\VOC2007', img_dir='JPEGImages', ann_dir='SegmentationClassPNG', split='D:\MMlab\mmsegmentation-0.20.2\data\VOCdevkit\VOC2007\ImageSets\Segmentation\train.txt', pipeline=[ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations'), dict(type='Resize', img_scale=(480, 480), ratio_range=(1.0, 1.0)), dict(type='RandomCrop', crop_size=(480, 480), cat_max_ratio=0.75), dict(type='RandomFlip', prob=0.0), dict(type='PhotoMetricDistortion'), dict( type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), dict(type='Pad', size=(480, 480), pad_val=0, seg_pad_val=255), dict(type='DefaultFormatBundle'), dict(type='Collect', keys=['img', 'gt_semantic_seg']) ]), val=dict( type='PascalContextDataset', data_root='D:\MMlab\mmsegmentation-0.20.2\data\VOCdevkit\VOC2007', img_dir='JPEGImages', ann_dir='SegmentationClassPNG', split='D:\MMlab\mmsegmentation-0.20.2\data\VOCdevkit\VOC2007\ImageSets\Segmentation\val.txt', pipeline=[ dict(type='LoadImageFromFile'), dict(type='MultiScaleFlipAug', img_scale=(480,480), flip=False, transforms=[ dict(type='Resize', img_scale=(480, 480), ratio_range=(1.0, 1.0)), dict(type='RandomFlip',prob=0.0), dict( type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), dict(type='ImageToTensor', keys=['img']), dict(type='Collect', keys=['img']) ]) ]), test=dict( type='PascalContextDataset', data_root='D:\MMlab\mmsegmentation-0.20.2\data\VOCdevkit\VOC2007', img_dir='JPEGImages', ann_dir='SegmentationClassPNG', split='D:\MMlab\mmsegmentation-0.20.2\data\VOCdevkit\VOC2007\ImageSets\Segmentation\test.txt', pipeline=[ dict(type='LoadImageFromFile'), dict( type='MultiScaleFlipAug', img_scale=(480,480), flip=False, transforms=[ dict(type='Resize', img_scale=(480, 480), ratio_range=(1.0, 1.0)), dict(type='RandomFlip',prob=0.0), dict( type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), dict(type='ImageToTensor', keys=['img']), dict(type='Collect', keys=['img']) ]) ])) log_config = dict( interval=10, hooks=[dict(type='TextLoggerHook', by_epoch=True)]) dist_params = dict(backend='nccl') log_level = 'INFO' load_from = 'D:\MMlab\mmsegmentation-0.20.2\tools\work_dirs\pspnet_r50-d8_480x480_40k_pascal_context\latest.pth' resume_from = None workflow = [('train', 1)] cudnn_benchmark = True optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) optimizer_config = dict() lr_config = dict(policy='poly', power=0.9, min_lr=0.0001, by_epoch=True) runner = dict(type='EpochBasedRunner', max_epochs=400) checkpoint_config = dict(interval=10,by_epoch=True) evaluation = dict(interval=1, metric='mIoU', pre_eval=True) work_dir = './work_dirs\pspnet_r50-d8text' test_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'], output_dir='work_dirs/format_results') gpu_ids = range(0, 1)
您好,请问您解决了吗?能否分享一下成功的经验?我也是读了mmengine的文档发现并不太适用于mmsegmentation 1.x
For example:
optim_wrapper = dict( delete=True, type='AmpOptimWrapper', optimizer=dict( type='AdamW', lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05), paramwise_cfg={ 'decay_rate': 0.9, 'decay_type': 'stage_wise', 'num_layers': 6 }, constructor='LearningRateDecayOptimizerConstructor', loss_scale='dynamic')
param_scheduler = [ dict( type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500), dict( type='PolyLR', power=1.0, begin=1500, end=160000, eta_min=0.0, by_epoch=False, ) ]
By default, models are trained on 8 GPUs with 2 images per GPU
train_dataloader = dict(batch_size=2) val_dataloader = dict(batch_size=1) test_dataloader = val_dataloader
I have tried many things, but none of them worked properly Training always stays in the first epoch and does not even stop automatically
This is my completely modified configuration file: