The official repo for [JSTARS'24] "MTP: Advancing Remote Sensing Foundation Model via Multi-Task Pretraining"
变化检测test精度问题

vaelaogudong commented 3 days ago

作者您好,我在用您的权重做测试时,精度并没有达到您log里的那么高,我已经将LEVIR数据集裁剪成256*256并且删除了完全无变化及 特别小变化的影像对,以下是我的测试log: 2024/09/18 15:22:10 - mmengine - INFO - Config: crop_size = ( 256, 256, ) data_preprocessor = dict( bgr_to_rgb=True, mean=[ 123.675, 116.28, 103.53, 123.675, 116.28, 103.53, ], pad_val=0, seg_pad_val=255, size_divisor=32, std=[ 58.395, 57.12, 57.375, 58.395, 57.12, 57.375, ], test_cfg=dict(size_divisor=32), type='DualInputSegDataPreProcessor') data_root = '/home/zzjx/dataset/LEVIR-CD/256/levir_format' dataset_type = 'LEVIR_CD_Dataset' default_hooks = dict( checkpoint=dict( by_epoch=True, interval=30, save_best='mIoU', type='CheckpointHook'), logger=dict(interval=50, log_metric_by_epoch=True, type='LoggerHook'), param_scheduler=dict(type='ParamSchedulerHook'), sampler_seed=dict(type='DistSamplerSeedHook'), timer=dict(type='IterTimerHook'), visualization=dict( draw=True, img_shape=( 256, 256, 3, ), interval=1, type='CDVisualizationHook')) default_scope = 'opencd' env_cfg = dict( cudnn_benchmark=True, dist_cfg=dict(backend='nccl'), mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0)) launcher = 'none' load_from = 'weights/levir-rvsa-l-mae-mtp-epoch_150.pth' log_level = 'INFO' log_processor = dict(by_epoch=True, type='LogProcessor', window_size=50) model = dict( backbone=dict( attn_drop_rate=0.0, depth=24, drop_path_rate=0.3, drop_rate=0.0, embed_dim=1024, frozen_stages=-1, img_size=256, interval=6, mlp_ratio=4, num_heads=16, out_indices=[ 7, 11, 15, 23, ], patch_size=16, pretrained= '/work/share/achk2o1zg1/diwang22/work_dir/multitask_pretrain/pretrain/avg/with_background/vit_l_rvsa_448_mae_samrs_mtp_three/last_vit_l_rvsa_ss_is_rd_pretrn_model_encoder.pth', qk_scale=None, qkv_bias=True, type='RVSA_MTP', use_abs_pos_emb=True, use_checkpoint=False), data_preprocessor=dict( bgr_to_rgb=True, mean=[ 123.675, 116.28, 103.53, 123.675, 116.28, 103.53, ], pad_val=0, seg_pad_val=255, size_divisor=32, std=[ 58.395, 57.12, 57.375, 58.395, 57.12, 57.375, ], test_cfg=dict(size_divisor=32), type='DualInputSegDataPreProcessor'), decode_head=dict( align_corners=False, attention_type=None, center=False, channels=64, decoder_channels=[ 512, 256, 128, 64, ], dropout_ratio=0.1, encoder_channels=[ 1024, 1024, 1024, 1024, ], ignore_index=255, in_channels=[ 1024, 1024, 1024, 1024, ], in_index=[ 0, 1, 2, 3, ], loss_decode=dict( loss_weight=1.0, type='mmseg.CrossEntropyLoss', use_sigmoid=False), n_blocks=4, norm_cfg=dict(requires_grad=True, type='SyncBN'), num_classes=2, type='UNetHead', use_batchnorm=True), neck=dict( out_indices=( 0, 1, 2, 3, ), policy='abs_diff', type='FeatureFusionNeck'), test_cfg=dict(mode='whole'), train_cfg=dict(), type='SiamEncoderDecoder') norm_cfg = dict(requires_grad=True, type='SyncBN') optim_wrapper = dict( constructor='LayerDecayOptimizerConstructor_ViT', optimizer=dict( betas=( 0.9, 0.999, ), lr=6e-05, type='AdamW', weight_decay=0.05), paramwise_cfg=dict(layer_decay_rate=0.9, num_layers=24)) param_scheduler = [ dict( begin=0, by_epoch=True, convert_to_iter_based=True, end=5, start_factor=1e-06, type='LinearLR'), dict(T_max=145, begin=5, by_epoch=True, end=150, type='CosineAnnealingLR'), ] resume = False test_cfg = dict(type='TestLoop') test_dataloader = dict( batch_size=1, dataset=dict( data_prefix=dict( img_path_from='test/A', img_path_to='test/B', seg_map_path='test/label'), data_root='/home/zzjx/dataset/LEVIR-CD/256/levir_format', pipeline=[ dict(type='MultiImgLoadImageFromFile'), dict(type='MultiImgLoadAnnotations'), dict(type='MultiImgPackSegInputs'), ], type='LEVIR_CD_Dataset'), num_workers=8, persistent_workers=True, sampler=dict(shuffle=False, type='DefaultSampler')) test_evaluator = dict( iou_metrics=[ 'mFscore', 'mIoU', ], type='mmseg.IoUMetric') test_pipeline = [ dict(type='MultiImgLoadImageFromFile'), dict(type='MultiImgLoadAnnotations'), dict(type='MultiImgPackSegInputs'), ] train_cfg = dict(max_epochs=150, type='EpochBasedTrainLoop', val_interval=30) train_dataloader = dict( batch_size=4, dataset=dict( data_prefix=dict( img_path_from='train/A', img_path_to='train/B', seg_map_path='train/label'), data_root='/home/zzjx/dataset/LEVIR-CD/256/levir_format', pipeline=[ dict(type='MultiImgLoadImageFromFile'), dict(type='MultiImgLoadAnnotations'), dict( degree=( -20, 20, ), flip_prob=0.5, rotate_prob=0.5, type='MultiImgRandomRotFlip'), dict( cat_max_ratio=0.75, crop_size=( 256, 256, ), type='MultiImgRandomCrop'), dict(prob=0.5, type='MultiImgExchangeTime'), dict( brightness_delta=10, contrast_range=( 0.8, 1.2, ), hue_delta=10, saturation_range=( 0.8, 1.2, ), type='MultiImgPhotoMetricDistortion'), dict(type='MultiImgPackSegInputs'), ], type='LEVIR_CD_Dataset'), num_workers=8, persistent_workers=True, sampler=dict(shuffle=True, type='DefaultSampler')) train_pipeline = [ dict(type='MultiImgLoadImageFromFile'), dict(type='MultiImgLoadAnnotations'), dict( degree=( -20, 20, ), flip_prob=0.5, rotate_prob=0.5, type='MultiImgRandomRotFlip'), dict( cat_max_ratio=0.75, crop_size=( 256, 256, ), type='MultiImgRandomCrop'), dict(prob=0.5, type='MultiImgExchangeTime'), dict( brightness_delta=10, contrast_range=( 0.8, 1.2, ), hue_delta=10, saturation_range=( 0.8, 1.2, ), type='MultiImgPhotoMetricDistortion'), dict(type='MultiImgPackSegInputs'), ] val_cfg = None val_dataloader = None val_evaluator = None val_pipeline = [ dict(type='MultiImgLoadImageFromFile'), dict(keep_ratio=True, scale=( 256, 256, ), type='MultiImgResize'), dict(type='MultiImgLoadAnnotations'), dict(type='MultiImgPackSegInputs'), ] vis_backends = [ dict(type='CDLocalVisBackend'), ] visualizer = dict( alpha=1.0, name='visualizer', save_dir='./predict/show', type='CDLocalVisualizer', vis_backends=[ dict(type='CDLocalVisBackend'), ]) work_dir = './predict'

2024/09/18 15:24:35 - mmengine - INFO - per class results: 2024/09/18 15:24:35 - mmengine - INFO - +-----------+--------+-----------+--------+-------+-------+ | Class | Fscore | Precision | Recall | IoU | Acc | +-----------+--------+-----------+--------+-------+-------+ | unchanged | 98.48 | 98.13 | 98.83 | 97.0 | 98.83 | | changed | 87.49 | 90.12 | 85.02 | 77.77 | 85.02 | +-----------+--------+-----------+--------+-------+-------+ 2024/09/18 15:24:35 - mmengine - INFO - Epoch(test) [935/935] aAcc: 97.2900 mFscore: 92.9900 mPrecision: 94.1300 mRecall: 91.9200 mIoU: 87.3900 mAcc: 91.9200 data_time: 0.0190 time: 0.1445 我想问一下这个差异是数据集裁切的差异吗?如果是,能不能请您提供一下您裁剪后的test数据集。

DotWang commented 3 days ago

@vaelaogudong 可能·,从你这个log我看不出来差别,这是我的LEVIR-CD裁剪代码,看看和你的有什么差别

import os
import numpy as np
import as scio
import matplotlib.pyplot as plt
from tqdm import tqdm
from PIL import Image
from skimage import io
from glob import glob

a_img_path = r'D:\dataset\levir_cd\test\A'
b_img_path = r'D:\dataset\levir_cd\test\B'
label_path = r'D:\dataset\levir_cd\test\label'
part_a_img_path = r'D:\dataset\levir_cd\levir_patch_dataset\test\A'
part_b_img_path = r'D:\dataset\levir_cd\levir_patch_dataset\test\B'
part_label_path = r'D:\dataset\levir_cd\levir_patch_dataset\test\label'

if not os.path.isdir(part_a_img_path):

if not os.path.isdir(part_b_img_path):

if not os.path.isdir(part_label_path):

IMAGE_DIR = a_img_path
LABEL_DIR = label_path 

IMG_FILE_EXT = 'png'

BLOCK_SZ = (256, 256)

img_paths = []
wildcard_image = '*.%s' % (IMG_FILE_EXT)
img_paths = glob(os.path.join(IMAGE_DIR, wildcard_image))
lab_paths = glob(os.path.join(LABEL_DIR, wildcard_image))

for i in tqdm(range(len(lab_paths))):

    # tif 2 png

    base_name = img_paths[i].split('\\')[-1].split('.')[0]

    a_img =,base_name+'.png'))

    b_img =,base_name+'.png'))

    label =,base_name+'.png'))

    a_img = np.array(a_img)
    b_img = np.array(b_img)
    label = np.array(label)

    # # clip


    yEnd, xEnd = np.subtract(IMG_SZ, BLOCK_SZ)
    x = np.linspace(0, xEnd,int(np.ceil(xEnd / (BLOCK_SZ[1] - BLOCK_MIN_OVERLAP))) + 1, endpoint=True).astype('int')
    y = np.linspace(0, yEnd,int(np.ceil(yEnd / (BLOCK_SZ[0] - BLOCK_MIN_OVERLAP)))+ 1, endpoint=True).astype('int')

    partInd = 0

    partimageName = base_name


    for j in range(len(y)):
        for k in range(len(x)):
            rStart, cStart = (y[j], x[k])
            rEnd, cEnd = (rStart + BLOCK_SZ[0], cStart + BLOCK_SZ[1])

            curr_a_Img = a_img[rStart:rEnd, cStart:cEnd, :]
            curr_b_Img = b_img[rStart:rEnd, cStart:cEnd, :]
            currGray = label[rStart:rEnd, cStart:cEnd]

            imgapartname = os.path.join(part_a_img_path, partimageName + '_' + str(partInd) + '.png')
            imgbpartname = os.path.join(part_b_img_path, partimageName + '_' + str(partInd) + '.png')
            graypartname = os.path.join(part_label_path, partimageName + '_' + str(partInd) + '.png')

            curr_a_Img = Image.fromarray(curr_a_Img)
            curr_b_Img = Image.fromarray(curr_b_Img)
            currGray = Image.fromarray(currGray)


            partInd += 1

print('Levir tiles processing finished!')
vaelaogudong commented 3 days ago
