ViTAE-Transformer / MTP

The official repo for [JSTARS'24] "MTP: Advancing Remote Sensing Foundation Model via Multi-Task Pretraining"
MIT License
151 stars 7 forks source link

变化检测test精度问题 #25

Closed vaelaogudong closed 3 days ago

vaelaogudong commented 3 days ago

作者您好,我在用您的权重做测试时,精度并没有达到您log里的那么高,我已经将LEVIR数据集裁剪成256*256并且删除了完全无变化及 特别小变化的影像对,以下是我的测试log: 2024/09/18 15:22:10 - mmengine - INFO - Config: crop_size = ( 256, 256, ) data_preprocessor = dict( bgr_to_rgb=True, mean=[ 123.675, 116.28, 103.53, 123.675, 116.28, 103.53, ], pad_val=0, seg_pad_val=255, size_divisor=32, std=[ 58.395, 57.12, 57.375, 58.395, 57.12, 57.375, ], test_cfg=dict(size_divisor=32), type='DualInputSegDataPreProcessor') data_root = '/home/zzjx/dataset/LEVIR-CD/256/levir_format' dataset_type = 'LEVIR_CD_Dataset' default_hooks = dict( checkpoint=dict( by_epoch=True, interval=30, save_best='mIoU', type='CheckpointHook'), logger=dict(interval=50, log_metric_by_epoch=True, type='LoggerHook'), param_scheduler=dict(type='ParamSchedulerHook'), sampler_seed=dict(type='DistSamplerSeedHook'), timer=dict(type='IterTimerHook'), visualization=dict( draw=True, img_shape=( 256, 256, 3, ), interval=1, type='CDVisualizationHook')) default_scope = 'opencd' env_cfg = dict( cudnn_benchmark=True, dist_cfg=dict(backend='nccl'), mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0)) launcher = 'none' load_from = 'weights/levir-rvsa-l-mae-mtp-epoch_150.pth' log_level = 'INFO' log_processor = dict(by_epoch=True, type='LogProcessor', window_size=50) model = dict( backbone=dict( attn_drop_rate=0.0, depth=24, drop_path_rate=0.3, drop_rate=0.0, embed_dim=1024, frozen_stages=-1, img_size=256, interval=6, mlp_ratio=4, num_heads=16, out_indices=[ 7, 11, 15, 23, ], patch_size=16, pretrained= '/work/share/achk2o1zg1/diwang22/work_dir/multitask_pretrain/pretrain/avg/with_background/vit_l_rvsa_448_mae_samrs_mtp_three/last_vit_l_rvsa_ss_is_rd_pretrn_model_encoder.pth', qk_scale=None, qkv_bias=True, type='RVSA_MTP', use_abs_pos_emb=True, use_checkpoint=False), data_preprocessor=dict( bgr_to_rgb=True, mean=[ 123.675, 116.28, 103.53, 123.675, 116.28, 103.53, ], pad_val=0, seg_pad_val=255, size_divisor=32, std=[ 58.395, 57.12, 57.375, 58.395, 57.12, 57.375, ], test_cfg=dict(size_divisor=32), type='DualInputSegDataPreProcessor'), decode_head=dict( align_corners=False, attention_type=None, center=False, channels=64, decoder_channels=[ 512, 256, 128, 64, ], dropout_ratio=0.1, encoder_channels=[ 1024, 1024, 1024, 1024, ], ignore_index=255, in_channels=[ 1024, 1024, 1024, 1024, ], in_index=[ 0, 1, 2, 3, ], loss_decode=dict( loss_weight=1.0, type='mmseg.CrossEntropyLoss', use_sigmoid=False), n_blocks=4, norm_cfg=dict(requires_grad=True, type='SyncBN'), num_classes=2, type='UNetHead', use_batchnorm=True), neck=dict( out_indices=( 0, 1, 2, 3, ), policy='abs_diff', type='FeatureFusionNeck'), test_cfg=dict(mode='whole'), train_cfg=dict(), type='SiamEncoderDecoder') norm_cfg = dict(requires_grad=True, type='SyncBN') optim_wrapper = dict( constructor='LayerDecayOptimizerConstructor_ViT', optimizer=dict( betas=( 0.9, 0.999, ), lr=6e-05, type='AdamW', weight_decay=0.05), paramwise_cfg=dict(layer_decay_rate=0.9, num_layers=24)) param_scheduler = [ dict( begin=0, by_epoch=True, convert_to_iter_based=True, end=5, start_factor=1e-06, type='LinearLR'), dict(T_max=145, begin=5, by_epoch=True, end=150, type='CosineAnnealingLR'), ] resume = False test_cfg = dict(type='TestLoop') test_dataloader = dict( batch_size=1, dataset=dict( data_prefix=dict( img_path_from='test/A', img_path_to='test/B', seg_map_path='test/label'), data_root='/home/zzjx/dataset/LEVIR-CD/256/levir_format', pipeline=[ dict(type='MultiImgLoadImageFromFile'), dict(type='MultiImgLoadAnnotations'), dict(type='MultiImgPackSegInputs'), ], type='LEVIR_CD_Dataset'), num_workers=8, persistent_workers=True, sampler=dict(shuffle=False, type='DefaultSampler')) test_evaluator = dict( iou_metrics=[ 'mFscore', 'mIoU', ], type='mmseg.IoUMetric') test_pipeline = [ dict(type='MultiImgLoadImageFromFile'), dict(type='MultiImgLoadAnnotations'), dict(type='MultiImgPackSegInputs'), ] train_cfg = dict(max_epochs=150, type='EpochBasedTrainLoop', val_interval=30) train_dataloader = dict( batch_size=4, dataset=dict( data_prefix=dict( img_path_from='train/A', img_path_to='train/B', seg_map_path='train/label'), data_root='/home/zzjx/dataset/LEVIR-CD/256/levir_format', pipeline=[ dict(type='MultiImgLoadImageFromFile'), dict(type='MultiImgLoadAnnotations'), dict( degree=( -20, 20, ), flip_prob=0.5, rotate_prob=0.5, type='MultiImgRandomRotFlip'), dict( cat_max_ratio=0.75, crop_size=( 256, 256, ), type='MultiImgRandomCrop'), dict(prob=0.5, type='MultiImgExchangeTime'), dict( brightness_delta=10, contrast_range=( 0.8, 1.2, ), hue_delta=10, saturation_range=( 0.8, 1.2, ), type='MultiImgPhotoMetricDistortion'), dict(type='MultiImgPackSegInputs'), ], type='LEVIR_CD_Dataset'), num_workers=8, persistent_workers=True, sampler=dict(shuffle=True, type='DefaultSampler')) train_pipeline = [ dict(type='MultiImgLoadImageFromFile'), dict(type='MultiImgLoadAnnotations'), dict( degree=( -20, 20, ), flip_prob=0.5, rotate_prob=0.5, type='MultiImgRandomRotFlip'), dict( cat_max_ratio=0.75, crop_size=( 256, 256, ), type='MultiImgRandomCrop'), dict(prob=0.5, type='MultiImgExchangeTime'), dict( brightness_delta=10, contrast_range=( 0.8, 1.2, ), hue_delta=10, saturation_range=( 0.8, 1.2, ), type='MultiImgPhotoMetricDistortion'), dict(type='MultiImgPackSegInputs'), ] val_cfg = None val_dataloader = None val_evaluator = None val_pipeline = [ dict(type='MultiImgLoadImageFromFile'), dict(keep_ratio=True, scale=( 256, 256, ), type='MultiImgResize'), dict(type='MultiImgLoadAnnotations'), dict(type='MultiImgPackSegInputs'), ] vis_backends = [ dict(type='CDLocalVisBackend'), ] visualizer = dict( alpha=1.0, name='visualizer', save_dir='./predict/show', type='CDLocalVisualizer', vis_backends=[ dict(type='CDLocalVisBackend'), ]) work_dir = './predict'

2024/09/18 15:22:17 - mmengine - INFO - Distributed training is not used, all SyncBatchNorm (SyncBN) layers in the model will be automatically reverted to BatchNormXd layers if they are used. 2024/09/18 15:22:17 - mmengine - INFO - Hooks will be executed in the following order: before_run: (VERY_HIGH ) RuntimeInfoHook
(BELOW_NORMAL) LoggerHook


before_train: (VERY_HIGH ) RuntimeInfoHook
(NORMAL ) IterTimerHook
(VERY_LOW ) CheckpointHook


before_train_epoch: (VERY_HIGH ) RuntimeInfoHook
(NORMAL ) IterTimerHook
(NORMAL ) DistSamplerSeedHook


before_train_iter: (VERY_HIGH ) RuntimeInfoHook
(NORMAL ) IterTimerHook


after_train_iter: (VERY_HIGH ) RuntimeInfoHook
(NORMAL ) IterTimerHook
(NORMAL ) CDVisualizationHook
(BELOW_NORMAL) LoggerHook
(LOW ) ParamSchedulerHook
(VERY_LOW ) CheckpointHook


after_train_epoch: (NORMAL ) IterTimerHook
(LOW ) ParamSchedulerHook
(VERY_LOW ) CheckpointHook


before_val: (VERY_HIGH ) RuntimeInfoHook


before_val_epoch: (NORMAL ) IterTimerHook


before_val_iter: (NORMAL ) IterTimerHook


after_val_iter: (NORMAL ) IterTimerHook
(NORMAL ) CDVisualizationHook
(BELOW_NORMAL) LoggerHook


after_val_epoch: (VERY_HIGH ) RuntimeInfoHook
(NORMAL ) IterTimerHook
(BELOW_NORMAL) LoggerHook
(LOW ) ParamSchedulerHook
(VERY_LOW ) CheckpointHook


after_val: (VERY_HIGH ) RuntimeInfoHook


after_train: (VERY_HIGH ) RuntimeInfoHook
(VERY_LOW ) CheckpointHook


before_test: (VERY_HIGH ) RuntimeInfoHook


before_test_epoch: (NORMAL ) IterTimerHook


before_test_iter: (NORMAL ) IterTimerHook


after_test_iter: (NORMAL ) IterTimerHook
(NORMAL ) CDVisualizationHook
(BELOW_NORMAL) LoggerHook


after_test_epoch: (VERY_HIGH ) RuntimeInfoHook
(NORMAL ) IterTimerHook
(BELOW_NORMAL) LoggerHook


after_test: (VERY_HIGH ) RuntimeInfoHook


after_run: (BELOW_NORMAL) LoggerHook


2024/09/18 15:22:18 - mmengine - WARNING - The prefix is not set in metric class IoUMetric. 2024/09/18 15:22:20 - mmengine - INFO - Load checkpoint from weights/levir-rvsa-l-mae-mtp-epoch_150.pth 2024/09/18 15:22:29 - mmengine - INFO - Epoch(test) [ 50/935] eta: 0:02:42 time: 0.1838 data_time: 0.0275 memory: 4641
2024/09/18 15:22:37 - mmengine - INFO - Epoch(test) [100/935] eta: 0:02:18 time: 0.1468 data_time: 0.0189 memory: 1459
2024/09/18 15:22:44 - mmengine - INFO - Epoch(test) [150/935] eta: 0:02:03 time: 0.1418 data_time: 0.0182 memory: 1459
2024/09/18 15:22:51 - mmengine - INFO - Epoch(test) [200/935] eta: 0:01:53 time: 0.1459 data_time: 0.0201 memory: 1459
2024/09/18 15:22:58 - mmengine - INFO - Epoch(test) [250/935] eta: 0:01:44 time: 0.1426 data_time: 0.0188 memory: 1459
2024/09/18 15:23:05 - mmengine - INFO - Epoch(test) [300/935] eta: 0:01:35 time: 0.1421 data_time: 0.0205 memory: 1459
2024/09/18 15:23:12 - mmengine - INFO - Epoch(test) [350/935] eta: 0:01:27 time: 0.1408 data_time: 0.0178 memory: 1459
2024/09/18 15:23:19 - mmengine - INFO - Epoch(test) [400/935] eta: 0:01:19 time: 0.1414 data_time: 0.0179 memory: 1459
2024/09/18 15:23:27 - mmengine - INFO - Epoch(test) [450/935] eta: 0:01:11 time: 0.1435 data_time: 0.0180 memory: 1459
2024/09/18 15:23:34 - mmengine - INFO - Epoch(test) [500/935] eta: 0:01:04 time: 0.1428 data_time: 0.0180 memory: 1459
2024/09/18 15:23:41 - mmengine - INFO - Epoch(test) [550/935] eta: 0:00:56 time: 0.1400 data_time: 0.0178 memory: 1459
2024/09/18 15:23:48 - mmengine - INFO - Epoch(test) [600/935] eta: 0:00:48 time: 0.1433 data_time: 0.0205 memory: 1459
2024/09/18 15:23:55 - mmengine - INFO - Epoch(test) [650/935] eta: 0:00:41 time: 0.1403 data_time: 0.0177 memory: 1459
2024/09/18 15:24:02 - mmengine - INFO - Epoch(test) [700/935] eta: 0:00:34 time: 0.1400 data_time: 0.0176 memory: 1459
2024/09/18 15:24:09 - mmengine - INFO - Epoch(test) [750/935] eta: 0:00:26 time: 0.1402 data_time: 0.0177 memory: 1459
2024/09/18 15:24:16 - mmengine - INFO - Epoch(test) [800/935] eta: 0:00:19 time: 0.1399 data_time: 0.0176 memory: 1459
2024/09/18 15:24:23 - mmengine - INFO - Epoch(test) [850/935] eta: 0:00:12 time: 0.1464 data_time: 0.0215 memory: 1459
2024/09/18 15:24:30 - mmengine - INFO - Epoch(test) [900/935] eta: 0:00:05 time: 0.1408 data_time: 0.0177 memory: 1459
2024/09/18 15:24:35 - mmengine - INFO - per class results: 2024/09/18 15:24:35 - mmengine - INFO - +-----------+--------+-----------+--------+-------+-------+ | Class | Fscore | Precision | Recall | IoU | Acc | +-----------+--------+-----------+--------+-------+-------+ | unchanged | 98.48 | 98.13 | 98.83 | 97.0 | 98.83 | | changed | 87.49 | 90.12 | 85.02 | 77.77 | 85.02 | +-----------+--------+-----------+--------+-------+-------+ 2024/09/18 15:24:35 - mmengine - INFO - Epoch(test) [935/935] aAcc: 97.2900 mFscore: 92.9900 mPrecision: 94.1300 mRecall: 91.9200 mIoU: 87.3900 mAcc: 91.9200 data_time: 0.0190 time: 0.1445 我想问一下这个差异是数据集裁切的差异吗?如果是,能不能请您提供一下您裁剪后的test数据集。

DotWang commented 3 days ago

@vaelaogudong 可能·,从你这个log我看不出来差别,这是我的LEVIR-CD裁剪代码,看看和你的有什么差别

import os
import numpy as np
import scipy.io as scio
import matplotlib.pyplot as plt
from tqdm import tqdm
from PIL import Image
from skimage import io
from glob import glob

a_img_path = r'D:\dataset\levir_cd\test\A'
b_img_path = r'D:\dataset\levir_cd\test\B'
label_path = r'D:\dataset\levir_cd\test\label'
part_a_img_path = r'D:\dataset\levir_cd\levir_patch_dataset\test\A'
part_b_img_path = r'D:\dataset\levir_cd\levir_patch_dataset\test\B'
part_label_path = r'D:\dataset\levir_cd\levir_patch_dataset\test\label'

if not os.path.isdir(part_a_img_path):
    os.makedirs(part_a_img_path)

if not os.path.isdir(part_b_img_path):
    os.makedirs(part_b_img_path)

if not os.path.isdir(part_label_path):
    os.makedirs(part_label_path)

IMAGE_DIR = a_img_path
LABEL_DIR = label_path 

IMG_FILE_EXT = 'png'

BLOCK_SZ = (256, 256)
BLOCK_MIN_OVERLAP = 0

img_paths = []
wildcard_image = '*.%s' % (IMG_FILE_EXT)
img_paths = glob(os.path.join(IMAGE_DIR, wildcard_image))
lab_paths = glob(os.path.join(LABEL_DIR, wildcard_image))

for i in tqdm(range(len(lab_paths))):

    # tif 2 png

    base_name = img_paths[i].split('\\')[-1].split('.')[0]

    a_img = Image.open(os.path.join(a_img_path,base_name+'.png'))

    b_img = Image.open(os.path.join(b_img_path,base_name+'.png'))

    label = Image.open(os.path.join(label_path,base_name+'.png'))

    a_img = np.array(a_img)
    b_img = np.array(b_img)
    label = np.array(label)

    # # clip

    IMG_SZ=label.shape[:2]

    yEnd, xEnd = np.subtract(IMG_SZ, BLOCK_SZ)
    x = np.linspace(0, xEnd,int(np.ceil(xEnd / (BLOCK_SZ[1] - BLOCK_MIN_OVERLAP))) + 1, endpoint=True).astype('int')
    y = np.linspace(0, yEnd,int(np.ceil(yEnd / (BLOCK_SZ[0] - BLOCK_MIN_OVERLAP)))+ 1, endpoint=True).astype('int')

    partInd = 0

    partimageName = base_name

    #print(partimageName)

    for j in range(len(y)):
        for k in range(len(x)):
            rStart, cStart = (y[j], x[k])
            rEnd, cEnd = (rStart + BLOCK_SZ[0], cStart + BLOCK_SZ[1])

            curr_a_Img = a_img[rStart:rEnd, cStart:cEnd, :]
            curr_b_Img = b_img[rStart:rEnd, cStart:cEnd, :]
            currGray = label[rStart:rEnd, cStart:cEnd]

            imgapartname = os.path.join(part_a_img_path, partimageName + '_' + str(partInd) + '.png')
            imgbpartname = os.path.join(part_b_img_path, partimageName + '_' + str(partInd) + '.png')
            graypartname = os.path.join(part_label_path, partimageName + '_' + str(partInd) + '.png')

            curr_a_Img = Image.fromarray(curr_a_Img)
            curr_b_Img = Image.fromarray(curr_b_Img)
            currGray = Image.fromarray(currGray)

            curr_a_Img.save(imgapartname)
            curr_b_Img.save(imgbpartname)
            currGray.save(graypartname)

            partInd += 1

print('Levir tiles processing finished!')
vaelaogudong commented 3 days ago

感谢,确实是这样