Open dongxiaofei12 opened 1 week ago
Is there an issue with the new version?
How many cards did you use? What is the batch size? Please provide more training parameter settings.
The batch size we used is 16. In addition, the pre-trained model of the backbone should be loaded.
How many cards did you use? What is the batch size? Please provide more training parameter settings.
System environment: sys.platform: linux Python: 3.8.18 | packaged by conda-forge | (default, Oct 10 2023, 15:44:36) [GCC 12.3.0] CUDA available: True MUSA available: False numpy_random_seed: 0 GPU 0: NVIDIA GeForce RTX 3080 Ti CUDA_HOME: /usr NVCC: Cuda compilation tools, release 9.1, V9.1.8 GCC: gcc (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0 PyTorch: 1.8.0+cu111 PyTorch compiling details: PyTorch built with:
Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=11.1, CUDNN_VERSION=8.0.5, CXX_COMPILER=/opt/rh/devtoolset-7/root/usr/bin/c++, CXX_FLAGS= -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_KINETO -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable -Wno-unused-function -Wno-unused-result -Wno-unused-local-typedefs -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=1.8.0, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON,
TorchVision: 0.9.0+cu111 OpenCV: 4.10.0 MMEngine: 0.10.4
2024/10/15 13:48:48 - mmengine - INFO - Config: EfficientFormer_depth = dict( L=( 5, 5, 15, 10, ), S0=( 2, 2, 6, 4, ), S1=( 3, 3, 9, 6, ), S2=( 4, 4, 12, 8, )) EfficientFormer_expansion_ratios = dict( L=( 4, 4, ( 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, ), ( 4, 4, 4, 3, 3, 3, 3, 4, 4, 4, ), ), S0=( 4, 4, ( 4, 3, 3, 3, 4, 4, ), ( 4, 3, 3, 4, ), ), S1=( 4, 4, ( 4, 4, 3, 3, 3, 3, 4, 4, 4, ), ( 4, 4, 3, 3, 4, 4, ), ), S2=( 4, 4, ( 4, 4, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, ), ( 4, 4, 3, 3, 3, 3, 4, 4, ), )) EfficientFormer_width = dict( L=( 40, 80, 192, 384, ), S0=( 32, 48, 96, 176, ), S1=( 32, 48, 120, 224, ), S2=( 32, 64, 144, 288, )) crop_size = ( 256, 256, ) data = dict(samples_per_gpu=4, workers_per_gpu=8) data_preprocessor = dict( bgr_to_rgb=True, mean=[ 123.675, 116.28, 103.53, ], pad_val=0, seg_pad_val=255, size=( 256, 256, ), std=[ 58.395, 57.12, 57.375, ], type='SegDataPreProcessor') data_root = '../data/custom-tif-256' dataset_type = 'PascalVOCDataset' default_hooks = dict( checkpoint=dict(by_epoch=False, interval=16000, type='CheckpointHook'), logger=dict(interval=50, log_metric_by_epoch=False, type='LoggerHook'), param_scheduler=dict(type='ParamSchedulerHook'), sampler_seed=dict(type='DistSamplerSeedHook'), timer=dict(type='IterTimerHook'), visualization=dict(type='SegVisualizationHook')) default_scope = 'mmseg' env_cfg = dict( cudnn_benchmark=True, dist_cfg=dict(backend='nccl'), mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0)) evaluation = dict(interval=4000) find_unused_parameters = True img_ratios = [ 0.5, 0.75, 1.0, 1.25, 1.5, 1.75, ] launcher = 'none' load_from = None log_level = 'INFO' log_processor = dict(by_epoch=False) lr_config = dict( delete=True, by_epoch=False, min_lr=0.0, policy='poly', power=1.0, warmup='linear', warmup_iters=1500, warmup_ratio=1e-06) model = dict( backbone=dict( init_cfg=dict( checkpoint= '/home/badwater/.cache/torch/hub/checkpoints/eformer_s2_450.pth', type='Pretrained'), style='pytorch', type='efficientformerv2_s2_feat'), data_preprocessor=dict( bgr_to_rgb=True, mean=[ 123.675, 116.28, 103.53, ], pad_val=0, seg_pad_val=255, size=( 256, 256, ), std=[ 58.395, 57.12, 57.375, ], type='SegDataPreProcessor'), decode_head=dict( align_corners=False, channels=256, dropout_ratio=0.1, dw_size=11, in_channels=[ 64, 144, 288, ], in_index=[ 1, 2, 3, ], is_dw=True, loss_decode=dict( loss_weight=1.0, type='CrossEntropyLoss', use_sigmoid=False), module='RCA', neck_size=11, next_repeat=6, norm_cfg=dict(requires_grad=True, type='SyncBN'), num_classes=5, ratio=1, square_kernel_size=3, type='CGRSeg'), test_cfg=dict(mode='whole'), train_cfg=dict(), type='EncoderDecoder') norm_cfg = dict(requires_grad=True, type='SyncBN') optim_wrapper = dict( clip_grad=None, optimizer=dict(lr=0.01, momentum=0.9, type='SGD', weight_decay=0.0005), type='OptimWrapper') optimizer = dict( betas=( 0.9, 0.999, ), lr=0.00012, paramwise_cfg=dict( custom_keys=dict( absolute_pos_embed=dict(decay_mult=0.0), head=dict(lr_mult=10.0), norm=dict(decay_mult=0.0))), type='AdamW', weight_decay=0.01) param_scheduler = [ dict( begin=0, by_epoch=False, end=160000, eta_min=0.0001, power=0.9, type='PolyLR'), ] randomness = dict(seed=0) resume = False test_cfg = dict(type='TestLoop') test_dataloader = dict( batch_size=1, dataset=dict( ann_file='ImageSets/Segmentation/test.txt', data_prefix=dict( img_path='TIFFImages', seg_map_path='SegmentationClassPNG'), data_root='../data/custom-tif-256', pipeline=[ dict(type='LoadImageFromFile'), dict(keep_ratio=True, scale=( 2048, 512, ), type='Resize'), dict(type='LoadAnnotations'), dict(type='PackSegInputs'), ], type='PascalVOCDataset'), num_workers=4, persistent_workers=True, sampler=dict(shuffle=False, type='DefaultSampler')) test_evaluator = dict( iou_metrics=[ 'mIoU', ], type='IoUMetric') test_pipeline = [ dict(type='LoadImageFromFile'), dict(keep_ratio=True, scale=( 2048, 512, ), type='Resize'), dict(type='LoadAnnotations'), dict(type='PackSegInputs'), ] train_cfg = dict( max_iters=160000, type='IterBasedTrainLoop', val_interval=16000) train_dataloader = dict( batch_size=4, dataset=dict( ann_file='ImageSets/Segmentation/train.txt', data_prefix=dict( img_path='TIFFImages', seg_map_path='SegmentationClassPNG'), data_root='../data/custom-tif-256', pipeline=[ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations'), dict( keep_ratio=True, ratio_range=( 0.5, 2.0, ), scale=( 2048, 512, ), type='RandomResize'), dict( cat_max_ratio=0.75, crop_size=( 256, 256, ), type='RandomCrop'), dict(prob=0.5, type='RandomFlip'), dict(type='PhotoMetricDistortion'), dict(type='PackSegInputs'), ], type='PascalVOCDataset'), num_workers=4, persistent_workers=True, sampler=dict(shuffle=True, type='DefaultSampler')) train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations'), dict( keep_ratio=True, ratio_range=( 0.5, 2.0, ), scale=( 2048, 512, ), type='RandomResize'), dict(cat_max_ratio=0.75, crop_size=( 256, 256, ), type='RandomCrop'), dict(prob=0.5, type='RandomFlip'), dict(type='PhotoMetricDistortion'), dict(type='PackSegInputs'), ] tta_model = dict(type='SegTTAModel') tta_pipeline = [ dict(backend_args=None, type='LoadImageFromFile'), dict( transforms=[ [ dict(keep_ratio=True, scale_factor=0.5, type='Resize'), dict(keep_ratio=True, scale_factor=0.75, type='Resize'), dict(keep_ratio=True, scale_factor=1.0, type='Resize'), dict(keep_ratio=True, scale_factor=1.25, type='Resize'), dict(keep_ratio=True, scale_factor=1.5, type='Resize'), dict(keep_ratio=True, scale_factor=1.75, type='Resize'), ], [ dict(direction='horizontal', prob=0.0, type='RandomFlip'), dict(direction='horizontal', prob=1.0, type='RandomFlip'), ], [ dict(type='LoadAnnotations'), ], [ dict(type='PackSegInputs'), ], ], type='TestTimeAug'), ] val_cfg = dict(type='ValLoop') val_dataloader = dict( batch_size=1, dataset=dict( ann_file='ImageSets/Segmentation/val.txt', data_prefix=dict( img_path='TIFFImages', seg_map_path='SegmentationClassPNG'), data_root='../data/custom-tif-256', pipeline=[ dict(type='LoadImageFromFile'), dict(keep_ratio=True, scale=( 2048, 512, ), type='Resize'), dict(type='LoadAnnotations'), dict(type='PackSegInputs'), ], type='PascalVOCDataset'), num_workers=4, persistent_workers=True, sampler=dict(shuffle=False, type='DefaultSampler')) val_evaluator = dict( iou_metrics=[ 'mIoU', ], type='IoUMetric') vis_backends = [ dict(type='LocalVisBackend'), dict(type='TensorboardVisBackend'), ] visualizer = dict( name='visualizer', type='SegLocalVisualizer', vis_backends=[ dict(type='LocalVisBackend'), dict(type='TensorboardVisBackend'), ]) work_dir = './work_dirs/cgrseg-b-256×256'
2024/10/15 13:48:54 - mmengine - INFO - Distributed training is not used, all SyncBatchNorm (SyncBN) layers in the model will be automatically reverted to BatchNormXd layers if they are used.
2024/10/15 13:48:54 - mmengine - WARNING - The prefix is not set in metric class IoUMetric. Name of parameter - Initialization information
This is my training log.
You only use 1 gpu,the samples_per_gpu should be 16. The batch size should be 16. Now, you use bs 4.
You only use 1 gpu,the samples_per_gpu should be 16. The batch size should be 16. Now, you use bs 4. I modified the batch_size according to the new version, but the training results are still unsatisfactory.
When I started training, a large number of missing keys as shown in the image above appeared. Is this normal?The mmsegmentation I used was version 1.2.2