open-mmlab / mmsegmentation

OpenMMLab Semantic Segmentation Toolbox and Benchmark.
https://mmsegmentation.readthedocs.io/en/main/
Apache License 2.0
8.23k stars 2.61k forks source link

How to change the training policy from Iter to Epoch based on the existing training profile ?如何根据已有的训练配置文件的训练政策从Iter改为Epoch? #2908

Open Xie-Muxi-BK opened 1 year ago

Xie-Muxi-BK commented 1 year ago

For example:

configs/convnext/convnext-tiny_upernet_8xb2-amp-160k_ade20k-512x512.py


_base_ = [
'../_base_/models/upernet_convnext.py', '../_base_/datasets/ade20k.py',
'../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
]
crop_size = (512, 512)
data_preprocessor = dict(size=crop_size)
checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-tiny_3rdparty_32xb128-noema_in1k_20220301-795e9634.pth'  # noqa
model = dict(
data_preprocessor=data_preprocessor,
backbone=dict(
type='mmcls.ConvNeXt',
arch='tiny',
out_indices=[0, 1, 2, 3],
drop_path_rate=0.4,
layer_scale_init_value=1.0,
gap_before_final_norm=False,
init_cfg=dict(
type='Pretrained', checkpoint=checkpoint_file,
prefix='backbone.')),
decode_head=dict(
in_channels=[96, 192, 384, 768],
num_classes=150,
),
auxiliary_head=dict(in_channels=384, num_classes=150),
test_cfg=dict(mode='slide', crop_size=crop_size, stride=(341, 341)),
)

optim_wrapper = dict( delete=True, type='AmpOptimWrapper', optimizer=dict( type='AdamW', lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05), paramwise_cfg={ 'decay_rate': 0.9, 'decay_type': 'stage_wise', 'num_layers': 6 }, constructor='LearningRateDecayOptimizerConstructor', loss_scale='dynamic')

param_scheduler = [ dict( type='LinearLR', start_factor=1e-6, by_epoch=False, begin=0, end=1500), dict( type='PolyLR', power=1.0, begin=1500, end=160000, eta_min=0.0, by_epoch=False, ) ]

By default, models are trained on 8 GPUs with 2 images per GPU

train_dataloader = dict(batch_size=2) val_dataloader = dict(batch_size=1) test_dataloader = val_dataloader


在mmseg0.X的修改runner的参数,但是现在没有runner了需要怎么改?
Modify runner parameters in mmseg0.X, but now there is no runner, what should be changed?

try:
```python
# training schedule for 160k
train_cfg = dict(
    type='EpochBasedTrainLoop', max_epochs=3, val_interval=1)
val_cfg = dict(type='ValLoop')
test_cfg = dict(type='TestLoop')

I have tried many things, but none of them worked properly Training always stays in the first epoch and does not even stop automatically

image

This is my completely modified configuration file:


norm_cfg = dict(type='SyncBN', requires_grad=True)
custom_imports = dict(imports='mmcls.models', allow_failed_imports=False)
checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-base_3rdparty_32xb128-noema_in1k_20220301-2a0ee547.pth'  # noqa
crop_size = (512, 512)

data_preprocessor = dict(
    type='SegDataPreProcessor',
    mean=[123.675, 116.28, 103.53],
    std=[58.395, 57.12, 57.375],
    bgr_to_rgb=True,
    pad_val=0,
    seg_pad_val=255,
    size=crop_size
)

model = dict(
    type='EncoderDecoder',
    data_preprocessor=data_preprocessor,
    pretrained=None,
    backbone=dict(
        type='mmcls.ConvNeXt',
        arch='base',
        out_indices=[0, 1, 2, 3],
        drop_path_rate=0.4,
        layer_scale_init_value=1.0,
        gap_before_final_norm=False,
        init_cfg=dict(
            type='Pretrained', checkpoint=checkpoint_file,
            prefix='backbone.')),
    decode_head=dict(
        type='UPerHead',
        in_channels=[128, 256, 512, 1024],
        in_index=[0, 1, 2, 3],
        pool_scales=(1, 2, 3, 6),
        channels=512,
        dropout_ratio=0.1,
        num_classes=150,
        norm_cfg=norm_cfg,
        align_corners=False,
        loss_decode=dict(
            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
    auxiliary_head=dict(
        type='FCNHead',
        in_channels=512,
        in_index=2,
        channels=256,
        num_convs=1,
        concat_input=False,
        dropout_ratio=0.1,
        num_classes=150,
        norm_cfg=norm_cfg,
        align_corners=False,
        loss_decode=dict(
            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
    # model training and testing settings
    train_cfg=dict(),
    test_cfg=dict(mode='slide', crop_size=crop_size, stride=(341, 341))
)

# dataset settings
dataset_type = 'ADE20KDataset'
data_root = 'data/ade/ADEChallengeData2016'
crop_size = (512, 512)
train_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(type='LoadAnnotations', reduce_zero_label=True),
    dict(
        type='RandomResize',
        scale=(2048, 512),
        ratio_range=(0.5, 2.0),
        keep_ratio=True),
    dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
    dict(type='RandomFlip', prob=0.5),
    dict(type='PhotoMetricDistortion'),
    dict(type='PackSegInputs')
]
test_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(type='Resize', scale=(2048, 512), keep_ratio=True),
    # add loading annotation after ``Resize`` because ground truth
    # does not need to do resize data transform
    dict(type='LoadAnnotations', reduce_zero_label=True),
    dict(type='PackSegInputs')
]
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
tta_pipeline = [
    dict(type='LoadImageFromFile', backend_args=None),
    dict(
        type='TestTimeAug',
        transforms=[
            [
                dict(type='Resize', scale_factor=r, keep_ratio=True)
                for r in img_ratios
            ],
            [
                dict(type='RandomFlip', prob=0., direction='horizontal'),
                dict(type='RandomFlip', prob=1., direction='horizontal')
            ], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
        ])
]
train_dataloader = dict(
    batch_size=2,
    num_workers=4,
    persistent_workers=True,
    sampler=dict(type='InfiniteSampler', shuffle=True),
    dataset=dict(
        type=dataset_type,
        data_root=data_root,
        data_prefix=dict(
            img_path='images/training', seg_map_path='annotations/training'),
        pipeline=train_pipeline))
val_dataloader = dict(
    batch_size=1,
    num_workers=4,
    persistent_workers=True,
    sampler=dict(type='DefaultSampler', shuffle=False),
    dataset=dict(
        type=dataset_type,
        data_root=data_root,
        data_prefix=dict(
            img_path='images/validation',
            seg_map_path='annotations/validation'),
        pipeline=test_pipeline))
test_dataloader = val_dataloader

val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
test_evaluator = val_evaluator

# runtime settings
default_scope = 'mmseg'
env_cfg = dict(
    cudnn_benchmark=True,
    mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
    dist_cfg=dict(backend='nccl'),
)
vis_backends = [dict(type='LocalVisBackend')]
visualizer = dict(
    type='SegLocalVisualizer', vis_backends=vis_backends, name='visualizer')
log_processor = dict(by_epoch=True)
log_level = 'INFO'
load_from = None
resume = False

tta_model = dict(type='SegTTAModel')

# optimizer
optim_wrapper = dict(
    # _delete_=True,
    type='AmpOptimWrapper',
    optimizer=dict(
        type='AdamW', lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05),
    paramwise_cfg={
        'decay_rate': 0.9,
        'decay_type': 'stage_wise',
        'num_layers': 12
    },
    constructor='LearningRateDecayOptimizerConstructor',
    loss_scale='dynamic',
    clip_grad=None
)

# learning policy
param_scheduler = [
    dict(
        type='LinearLR', 
        start_factor=1e-6, 
        by_epoch=False, 
        begin=0, 
        end=1500
    ),
    dict(
        type='PolyLR',
        power=1.0,
        begin=1500,
        end=160000,
        eta_min=0.0,
        by_epoch=False,
    )
]

# training schedule for 160k
train_cfg = dict(
    type='EpochBasedTrainLoop', max_epochs=3, val_interval=1)
val_cfg = dict(type='ValLoop')
test_cfg = dict(type='TestLoop')
# default_hooks = dict(
#     # timer=dict(type='EpochTimerHook'),
#     # logger=dict(type='LoggerHook', interval=1, log_metric_by_epoch=True),
#     # param_scheduler=dict(type='ParamSchedulerHook'),
#     # checkpoint=dict(type='CheckpointHook', by_epoch=True, interval=1),
#     # sampler_seed=dict(type='DistSamplerSeedHook'),
#     visualization=dict(type='SegVisualizationHook'))
huanruizhang123 commented 1 year ago

Hello, have you solved this problem? I am studying this problem, but I just learned this. I would like to ask if there is any difference between using Epoch and Iter, or just to get used to my own training method

nightrain01 commented 1 year ago

You may need to modify the data configuration. Use 'DefaultSampler' instead of 'InfiniteSampler'.

train_dataloader = dict(
    batch_size=2,
    num_workers=4,
    persistent_workers=True,
    # sampler=dict(type='InfiniteSampler', shuffle=True),
    sampler=dict(type='DefaultSampler', shuffle=True),
    dataset=dict(
        type=dataset_type,
        data_root=data_root,
        data_prefix=dict(
            img_path='images/training', seg_map_path='annotations/training'),
        pipeline=train_pipeline))
gaohaozhang commented 1 year ago

不知道这么做是不是有瑕疵,你可以看一下我得设置,可以运行但是细节我没有仔细研究: norm_cfg = dict(type='SyncBN', requires_grad=True) model = dict( type='EncoderDecoder', backbone=dict( type='ResNet', depth=50, num_stages=4, out_indices=(0, 1, 2, 3), dilations=(1, 1, 2, 4), strides=(1, 2, 1, 1), norm_cfg=dict(type='SyncBN', requires_grad=True), norm_eval=False, style='pytorch', contract_dilation=True), decode_head=dict( type='PSPHead', in_channels=2048, in_index=3, channels=512, pool_scales=(1, 2, 3, 6), dropout_ratio=0.1, num_classes=3, norm_cfg=dict(type='SyncBN', requires_grad=True), align_corners=False, loss_decode=dict( type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), train_cfg=dict(), test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) dataset_type = 'PascalContextDataset' data_root = 'D:\MMlab\mmsegmentation-0.20.2\data\VOCdevkit\VOC2007' img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) img_scale=(480, 480) crop_size = (480, 480) train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations'), dict(type='Resize', img_scale=(480, 480), ratio_range=(1.0, 1.0)), dict(type='RandomCrop', crop_size=(480, 480), cat_max_ratio=0.75), dict(type='RandomFlip', prob=0.0), dict(type='PhotoMetricDistortion'), dict( type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), dict(type='Pad', size=(480, 480), pad_val=0, seg_pad_val=255), dict(type='DefaultFormatBundle'), dict(type='Collect', keys=['img', 'gt_semantic_seg']) ] test_pipeline = [ dict(type='LoadImageFromFile'), dict( type='MultiScaleFlipAug', img_scale=(480, 480), flip=False, transforms=[ dict(type='Resize', img_scale=(480, 480), ratio_range=(1.0, 1.0)), dict(type='RandomFlip', prob=0.0), dict( type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), dict(type='ImageToTensor', keys=['img']), dict(type='Collect', keys=['img']) ] )

] data = dict( samples_per_gpu=3, workers_per_gpu=1, train=dict( type='PascalContextDataset', data_root='D:\MMlab\mmsegmentation-0.20.2\data\VOCdevkit\VOC2007', img_dir='JPEGImages', ann_dir='SegmentationClassPNG', split='D:\MMlab\mmsegmentation-0.20.2\data\VOCdevkit\VOC2007\ImageSets\Segmentation\train.txt', pipeline=[ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations'), dict(type='Resize', img_scale=(480, 480), ratio_range=(1.0, 1.0)), dict(type='RandomCrop', crop_size=(480, 480), cat_max_ratio=0.75), dict(type='RandomFlip', prob=0.0), dict(type='PhotoMetricDistortion'), dict( type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), dict(type='Pad', size=(480, 480), pad_val=0, seg_pad_val=255), dict(type='DefaultFormatBundle'), dict(type='Collect', keys=['img', 'gt_semantic_seg']) ]), val=dict( type='PascalContextDataset', data_root='D:\MMlab\mmsegmentation-0.20.2\data\VOCdevkit\VOC2007', img_dir='JPEGImages', ann_dir='SegmentationClassPNG', split='D:\MMlab\mmsegmentation-0.20.2\data\VOCdevkit\VOC2007\ImageSets\Segmentation\val.txt', pipeline=[ dict(type='LoadImageFromFile'), dict(type='MultiScaleFlipAug', img_scale=(480,480), flip=False, transforms=[ dict(type='Resize', img_scale=(480, 480), ratio_range=(1.0, 1.0)), dict(type='RandomFlip',prob=0.0), dict( type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), dict(type='ImageToTensor', keys=['img']), dict(type='Collect', keys=['img']) ]) ]), test=dict( type='PascalContextDataset', data_root='D:\MMlab\mmsegmentation-0.20.2\data\VOCdevkit\VOC2007', img_dir='JPEGImages', ann_dir='SegmentationClassPNG', split='D:\MMlab\mmsegmentation-0.20.2\data\VOCdevkit\VOC2007\ImageSets\Segmentation\test.txt', pipeline=[ dict(type='LoadImageFromFile'), dict( type='MultiScaleFlipAug', img_scale=(480,480), flip=False, transforms=[ dict(type='Resize', img_scale=(480, 480), ratio_range=(1.0, 1.0)), dict(type='RandomFlip',prob=0.0), dict( type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), dict(type='ImageToTensor', keys=['img']), dict(type='Collect', keys=['img']) ]) ])) log_config = dict( interval=10, hooks=[dict(type='TextLoggerHook', by_epoch=True)]) dist_params = dict(backend='nccl') log_level = 'INFO' load_from = 'D:\MMlab\mmsegmentation-0.20.2\tools\work_dirs\pspnet_r50-d8_480x480_40k_pascal_context\latest.pth' resume_from = None workflow = [('train', 1)] cudnn_benchmark = True optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) optimizer_config = dict() lr_config = dict(policy='poly', power=0.9, min_lr=0.0001, by_epoch=True) runner = dict(type='EpochBasedRunner', max_epochs=400) checkpoint_config = dict(interval=10,by_epoch=True) evaluation = dict(interval=1, metric='mIoU', pre_eval=True) work_dir = './work_dirs\pspnet_r50-d8text' test_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'], output_dir='work_dirs/format_results') gpu_ids = range(0, 1)

yangtaowillv commented 11 months ago

您好,请问您解决了吗?能否分享一下成功的经验?我也是读了mmengine的文档发现并不太适用于mmsegmentation 1.x