Closed sskim0126 closed 11 months ago
Hi~.Thanks for you interest for our work.
Could you provide your log
and config
files for better analysis the problem?
Thank you for your response!
First of all, config
files are identical to the files provided on your GitHub repository, except for the changed paths.
Here are the config
files we used.
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook', by_epoch=False),
dict(type='TensorboardLoggerHook')
])
dist_params = dict(backend='nccl')
log_level = 'INFO'
load_from = None
resume_from = None
workflow = [('train', 1)]
cudnn_benchmark = True
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(type='Fp16OptimizerHook', distributed=False)
lr_config = dict( policy='CosineAnnealing', warmup='linear', warmup_iters=100, min_lr=0.000001, by_epoch=False)
runner = dict(type='EpochBasedRunner', max_epochs=50) checkpoint_config = dict(by_epoch=True, interval=1) evaluation = dict(interval=5, metric=['mIoU', "mFscore"], pre_eval=True, by_epoch=True)
norm_cfg = dict(type='BN', requires_grad=True) checkpoint_path = "./pretrained/CMID-ResNet50-millionAID.pth" model = dict( type='EncoderDecoder', pretrained=checkpoint_path, backbone=dict( type='ResNet', depth=50, num_stages=4, out_indices=(0, 1, 2, 3), dilations=(1, 1, 1, 1), strides=(1, 2, 2, 2), norm_cfg=dict(type='BN', requires_grad=True), norm_eval=False, style='pytorch', contract_dilation=True), decode_head=dict( type='UPerHead', in_channels=[256, 512, 1024, 2048], in_index=[0, 1, 2, 3], pool_scales=(1, 2, 3, 6), channels=512, ignore_index=255, dropout_ratio=0.1, num_classes=6, norm_cfg=dict(type='BN', requires_grad=True), align_corners=False, loss_decode=dict( type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), auxiliary_head=dict( type='FCNHead', in_channels=1024, in_index=2, channels=256, ignore_index=255, num_convs=1, concat_input=False, dropout_ratio=0.1, num_classes=6, norm_cfg=dict(type='BN', requires_grad=True), align_corners=False, loss_decode=dict( type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), train_cfg=dict(), test_cfg=dict(mode='whole'))
dataset_type="PotsdamDataset" data_root = 'data/potsdam' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) crop_size = (512, 512) train_pipeline = [ dict(type='LoadImageFromFile'),
dict(type='LoadAnnotationsReduceIgnoreIndex', reduce_zero_label=True, ignore_index=6),
dict(type='Resize', img_scale=(512, 512), ratio_range=(0.5, 2.0)),
dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
dict(
type='Normalize',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
to_rgb=True),
dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_semantic_seg'])
] test_pipeline = [ dict(type='LoadImageFromFile'), dict( type='MultiScaleFlipAug', img_scale=(512, 512), flip=False, transforms=[ dict(type='Resize', keep_ratio=True), dict(type='RandomFlip'), dict( type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), dict(type='ImageToTensor', keys=['img']), dict(type='Collect', keys=['img']) ]) ] data = dict( samples_per_gpu=8, workers_per_gpu=4, train=dict( type=dataset_type, data_root=data_root, img_dir='img_dir/train', ann_dir='ann_dir/train', pipeline=train_pipeline), val=dict( type=dataset_type, data_root=data_root, img_dir='img_dir/val', ann_dir='ann_dir/val', pipeline=test_pipeline), test=dict( type=dataset_type, data_root=data_root, img_dir='img_dir/val', ann_dir='ann_dir/val', pipeline=test_pipeline))
* CMID-Swin
```python
# model settings
# checkpoint_file = "./pretrained/CMID_Swin-B_bk_200ep.pth"
checkpoint_file = "./pretrained/CMID-Swin-B-millionAID.pth"
norm_cfg = dict(type='SyncBN', requires_grad=True)
backbone_norm_cfg = dict(type='LN', requires_grad=True)
model = dict(
type='EncoderDecoder',
pretrained=None,
backbone=dict(
type='SwinTransformer',
pretrain_img_size=224,
embed_dims=128,
patch_size=4,
window_size=7,
mlp_ratio=4,
depths=[2, 2, 18, 2],
num_heads=[4, 8, 16, 32],
strides=(4, 2, 2, 2),
out_indices=(0, 1, 2, 3),
qkv_bias=True,
qk_scale=None,
patch_norm=True,
drop_rate=0.,
attn_drop_rate=0.,
drop_path_rate=0.3,
use_abs_pos_embed=False,
act_cfg=dict(type='GELU'),
init_cfg=dict(type='Pretrained', checkpoint=checkpoint_file),
norm_cfg=backbone_norm_cfg),
decode_head=dict(
type='UPerHead',
in_channels=[128, 256, 512, 1024],
in_index=[0, 1, 2, 3],
pool_scales=(1, 2, 3, 6),
channels=512,
dropout_ratio=0.1,
num_classes=6,
ignore_index=255,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
auxiliary_head=dict(
type='FCNHead',
in_channels=512,
in_index=2,
channels=256,
num_convs=1,
concat_input=False,
dropout_ratio=0.1,
num_classes=6,
ignore_index=255,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
# model training and testing settings
train_cfg=dict(),
test_cfg=dict(mode='whole'))
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook', by_epoch=False),
dict(type='TensorboardLoggerHook')
])
dist_params = dict(backend='nccl')
log_level = 'INFO'
load_from = None
resume_from = None
workflow = [('train', 1)]
cudnn_benchmark = True
dataset_type = 'PotsdamDataset'
data_root = 'data/potsdam'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
crop_size = (512, 512)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='LoadAnnotationsReduceIgnoreIndex',
reduce_zero_label=True,
ignore_index=6),
dict(type='Resize', img_scale=(512, 512), ratio_range=(0.5, 2.0)),
dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
dict(
type='Normalize',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
to_rgb=True),
dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_semantic_seg'])
]
val_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(512, 512),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(
type='Normalize',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
to_rgb=True),
dict(type='Pad', size_divisor=512),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img'])
])
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=None,
img_ratios=[1.0],
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(
type='Normalize',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
to_rgb=True),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img'])
])
]
data = dict(
samples_per_gpu=8,
workers_per_gpu=2,
train=dict(
type=dataset_type,
data_root=data_root,
img_dir='img_dir/train',
ann_dir='ann_dir/train',
pipeline=train_pipeline),
val=dict(
type=dataset_type,
data_root=data_root,
img_dir='img_dir/val',
ann_dir='ann_dir/val',
pipeline=test_pipeline),
test=dict(
type=dataset_type,
data_root=data_root,
img_dir='img_dir/val',
ann_dir='ann_dir/val',
pipeline=test_pipeline))
optimizer_config = None
optimizer = dict(
type='AdamW',
lr=0.00006,
betas=(0.9, 0.999),
weight_decay=0.01,
paramwise_cfg=dict(
custom_keys={
'absolute_pos_embed': dict(decay_mult=0.),
'relative_position_bias_table': dict(decay_mult=0.),
'norm': dict(decay_mult=0.)
}))
lr_config = dict(
policy='CosineAnnealing',
warmup='linear',
warmup_iters=100,
min_lr=0.000001,
by_epoch=False)
checkpoint_config = dict(by_epoch=True, interval=1)
evaluation = dict(
interval=5, metric=['mIoU', 'mFscore'], pre_eval=True, by_epoch=True)
runner = dict(type='EpochBasedRunner', max_epochs=50)
gpu_ids = [0]
auto_resume = False
And here are the log
json files
CMID-ResNet
{
"config": "./configs/cmid_swin.py",
"metric": {
"aAcc": 0.8137000000000001,
"mIoU": 0.6962,
"mAcc": 0.8009999999999999,
"IoU.impervious_surface": 0.747699966430664,
"IoU.building": 0.6880999755859375,
"IoU.low_vegetation": 0.6165000152587891,
"IoU.tree": 0.6458000183105469,
"IoU.car": 0.7827999877929688,
"Acc.impervious_surface": 0.8461000061035157,
"Acc.building": 0.9302999877929687,
"Acc.low_vegetation": 0.725,
"Acc.tree": 0.6923000335693359,
"Acc.car": 0.8113999938964844
}
}
CMID-Swin
{
"config": "./configs/cmid_resnet.py",
"metric": {
"aAcc": 0.8151999999999999,
"mIoU": 0.7041,
"mAcc": 0.8096,
"IoU.impervious_surface": 0.7501999664306641,
"IoU.building": 0.6973999786376953,
"IoU.low_vegetation": 0.6097000122070313,
"IoU.tree": 0.6477999877929688,
"IoU.car": 0.8154000091552734,
"Acc.impervious_surface": 0.8262000274658203,
"Acc.building": 0.9519999694824218,
"Acc.low_vegetation": 0.729800033569336,
"Acc.tree": 0.6966000366210937,
"Acc.car": 0.8431999969482422
}
}
Hi~ @sskim0126 Try the following for reproducing similar results.
mmseg
according to same file structures as newled updated folder SemanticSegmentation/mmseg/core/optimizers
For your reference, the following is my evaluation result after training 50 epochs (you can achieve the best result by adding --aug-test
with mmseg test.py
script. The following result didn't add --aug-test
since I lost the added results :( ):
Finally, I have updated the instructions for semantic segmention tasks.
Thanks!
Closed as long periods of inactivity, feel free to reopen if there is any problem.
Hello, I would like to attempt semantic segmentation on the Potsdam dataset using your approach. I have followed all the instructions you provided, but when I conducted tests, I found that the performance was slightly lower than what is indicated.
The performance metrics I obtained are as follows: Metrics from GitHub
Metrics from my own experiments
I would like to know if I made any mistakes that could have led to the lower performance.
For reference, here is the environment in which I conducted the experiments:
Model weights: CMID - ResNet-50: Link to Download CMID - Swin-B: Link to Download
Conda environment: Name Version Build Channel _libgcc_mutex 0.1 conda_forge conda-forge _openmp_mutex 4.5 2_gnu conda-forge absl-py 2.0.0 pypi_0 pypi addict 2.4.0 pypi_0 pypi aliyun-python-sdk-core 2.14.0 pypi_0 pypi aliyun-python-sdk-kms 2.16.2 pypi_0 pypi appdirs 1.4.4 pypi_0 pypi attrs 23.1.0 pypi_0 pypi bzip2 1.0.8 h7f98852_4 conda-forge ca-certificates 2023.08.22 h06a4308_0 defaults cachetools 5.3.2 pypi_0 pypi certifi 2023.7.22 pypi_0 pypi cffi 1.16.0 pypi_0 pypi charset-normalizer 3.3.1 pypi_0 pypi cityscapesscripts 2.2.2 pypi_0 pypi click 8.1.7 pypi_0 pypi codecov 2.1.13 pypi_0 pypi colorama 0.4.6 pypi_0 pypi coloredlogs 15.0.1 pypi_0 pypi contourpy 1.1.1 pypi_0 pypi coverage 7.3.2 pypi_0 pypi crcmod 1.7 pypi_0 pypi cryptography 41.0.5 pypi_0 pypi cycler 0.12.1 pypi_0 pypi exceptiongroup 1.1.3 pypi_0 pypi flake8 6.1.0 pypi_0 pypi fonttools 4.43.1 pypi_0 pypi google-auth 2.23.3 pypi_0 pypi google-auth-oauthlib 1.0.0 pypi_0 pypi grpcio 1.59.0 pypi_0 pypi humanfriendly 10.0 pypi_0 pypi idna 3.4 pypi_0 pypi importlib-metadata 6.8.0 pypi_0 pypi importlib-resources 6.1.0 pypi_0 pypi iniconfig 2.0.0 pypi_0 pypi interrogate 1.5.0 pypi_0 pypi jmespath 0.10.0 pypi_0 pypi kiwisolver 1.4.5 pypi_0 pypi ld_impl_linux-64 2.40 h41732ed_0 conda-forge libffi 3.4.4 h6a678d5_0 defaults libgcc-ng 13.2.0 h807b86a_2 conda-forge libgomp 13.2.0 h807b86a_2 conda-forge libnsl 2.0.1 hd590300_0 conda-forge libsqlite 3.43.2 h2797004_0 conda-forge libstdcxx-ng 13.2.0 h7e041cc_2 conda-forge libuuid 2.38.1 h0b41bf4_0 conda-forge libzlib 1.2.13 hd590300_5 conda-forge markdown 3.5 pypi_0 pypi markdown-it-py 3.0.0 pypi_0 pypi markupsafe 2.1.3 pypi_0 pypi matplotlib 3.7.3 pypi_0 pypi mccabe 0.7.0 pypi_0 pypi mdurl 0.1.2 pypi_0 pypi mmcls 0.25.0 pypi_0 pypi mmcv-full 1.6.1 pypi_0 pypi mmengine 0.9.0 pypi_0 pypi mmsegmentation 0.27.0 dev_0
model-index 0.1.11 pypi_0 pypi
ncurses 6.4 hcb278e6_0 conda-forge
numpy 1.24.4 pypi_0 pypi
oauthlib 3.2.2 pypi_0 pypi
opencv-python 4.8.1.78 pypi_0 pypi
opendatalab 0.0.10 pypi_0 pypi
openmim 0.3.9 pypi_0 pypi
openssl 3.1.4 hd590300_0 conda-forge
openxlab 0.0.28 pypi_0 pypi
ordered-set 4.1.0 pypi_0 pypi
oss2 2.17.0 pypi_0 pypi
packaging 23.2 pypi_0 pypi
pandas 2.0.3 pypi_0 pypi
pillow 10.1.0 pypi_0 pypi
pip 23.3.1 pyhd8ed1ab_0 conda-forge
platformdirs 3.11.0 pypi_0 pypi
pluggy 1.3.0 pypi_0 pypi
prettytable 3.9.0 pypi_0 pypi
protobuf 4.24.4 pypi_0 pypi
py 1.11.0 pypi_0 pypi
pyasn1 0.5.0 pypi_0 pypi
pyasn1-modules 0.3.0 pypi_0 pypi
pycodestyle 2.11.1 pypi_0 pypi
pycparser 2.21 pypi_0 pypi
pycryptodome 3.19.0 pypi_0 pypi
pyflakes 3.1.0 pypi_0 pypi
pygments 2.16.1 pypi_0 pypi
pyparsing 3.1.1 pypi_0 pypi
pyquaternion 0.9.9 pypi_0 pypi
pytest 7.4.3 pypi_0 pypi
python 3.8.18 hd12c33a_0_cpython conda-forge
python-dateutil 2.8.2 pypi_0 pypi
pytz 2023.3.post1 pypi_0 pypi
pyyaml 6.0.1 pypi_0 pypi
readline 8.2 h8228510_1 conda-forge
requests 2.28.2 pypi_0 pypi
requests-oauthlib 1.3.1 pypi_0 pypi
rich 13.4.2 pypi_0 pypi
rsa 4.9 pypi_0 pypi
setuptools 59.5.0 pypi_0 pypi
six 1.16.0 pypi_0 pypi
tabulate 0.9.0 pypi_0 pypi
tensorboard 2.14.0 pypi_0 pypi
tensorboard-data-server 0.7.2 pypi_0 pypi
termcolor 2.3.0 pypi_0 pypi
tk 8.6.13 h2797004_0 conda-forge
toml 0.10.2 pypi_0 pypi
tomli 2.0.1 pypi_0 pypi
torch 1.10.1+cu111 pypi_0 pypi
torchaudio 0.10.1+cu111 pypi_0 pypi
torchvision 0.11.2+cu111 pypi_0 pypi
tqdm 4.65.2 pypi_0 pypi
typing 3.7.4.3 pypi_0 pypi
typing-extensions 4.8.0 pypi_0 pypi
tzdata 2023.3 pypi_0 pypi
urllib3 1.26.18 pypi_0 pypi
wcwidth 0.2.8 pypi_0 pypi
werkzeug 3.0.1 pypi_0 pypi
wheel 0.41.2 pyhd8ed1ab_0 conda-forge
xdoctest 1.1.1 pypi_0 pypi
xz 5.4.2 h5eee18b_0 defaults
yapf 0.40.1 pypi_0 pypi
zipp 3.17.0 pypi_0 pypi