open-mmlab / mmpretrain

OpenMMLab Pre-training Toolbox and Benchmark
https://mmpretrain.readthedocs.io/en/latest/
Apache License 2.0
3.38k stars 1.05k forks source link

[Feature] How to modify the input size of the network when training the backbone network in mmpretrain, for example, how to change the input of ResNet (224,224) to (640,640)? #1746

Open baiguosummer opened 1 year ago

baiguosummer commented 1 year ago

Describe the feature

This is the configuration I modified, mainly to change it to 640. Is this modification correct?

train_pipeline = [ dict(type='LoadImageFromFile'),

dict(type='RandomResizedCrop', scale=224),

dict(type='RandomResizedCrop', scale=640),
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
dict(type='PackInputs'),

]

test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='ResizeEdge', scale=640, edge='short'),

dict(type='CenterCrop', crop_size=224),

dict(type='CenterCrop', crop_size=640),
dict(type='PackInputs'),

]

Here is the configuration of my model: (torch) panda@amd:/media/panda/nvme2T/pycharm/openMMlab$ bash mmpretrain/sh/print_config.sh model = dict( type='ImageClassifier', backbone=dict( type='ResNet', depth=50, num_stages=4, out_indices=(3, ), style='pytorch', frozen_stages=-1), neck=dict(type='GlobalAveragePooling'), head=dict( type='LinearClsHead', num_classes=6, in_channels=2048, loss=dict(type='CrossEntropyLoss', loss_weight=1.0), topk=( 1, 5, )), init_cfg=dict( type='Pretrained', checkpoint= 'mmpretrain/checkpoints/resnet50_8xb32_in1k_20210831-ea4938fc.pth')) dataset_type = 'CustomDataset' data_preprocessor = dict( num_classes=6, mean=[ 123.675, 116.28, 103.53, ], std=[ 58.395, 57.12, 57.375, ], to_rgb=True) train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='RandomResizedCrop', scale=640), dict(type='RandomFlip', prob=0.5, direction='horizontal'), dict(type='PackInputs'), ] test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='ResizeEdge', scale=640, edge='short'), dict(type='CenterCrop', crop_size=640), dict(type='PackInputs'), ] train_dataloader = dict( batch_size=8, num_workers=8, dataset=dict( type='CustomDataset', metainfo=dict( classes=[ 'bulldozer', 'car', 'excavator', 'hole', 'person', 'truck', ], palette=[ ( 20, 220, 20, ), ( 20, 20, 240, ), ( 220, 20, 20, ), ( 40, 100, 150, ), ( 200, 50, 120, ), ( 200, 150, 150, ), ]), data_prefix='data/mmpretrain/pretrain_resnet_c6/', with_label=True, pipeline=[ dict(type='LoadImageFromFile'), dict(type='RandomResizedCrop', scale=640), dict(type='RandomFlip', prob=0.5, direction='horizontal'), dict(type='PackInputs'), ]), sampler=dict(type='DefaultSampler', shuffle=True)) val_dataloader = dict( batch_size=32, num_workers=5, dataset=dict( type='CustomDataset', metainfo=dict( classes=[ 'bulldozer', 'car', 'excavator', 'hole', 'person', 'truck', ], palette=[ ( 20, 220, 20, ), ( 20, 20, 240, ), ( 220, 20, 20, ), ( 40, 100, 150, ), ( 200, 50, 120, ), ( 200, 150, 150, ), ]), data_prefix='data/mmpretrain/pretrain_resnet_c6/', with_label=True, pipeline=[ dict(type='LoadImageFromFile'), dict(type='ResizeEdge', scale=640, edge='short'), dict(type='CenterCrop', crop_size=640), dict(type='PackInputs'), ]), sampler=dict(type='DefaultSampler', shuffle=False)) val_evaluator = dict( type='Accuracy', topk=( 1, 5, )) test_dataloader = dict( batch_size=32, num_workers=5, dataset=dict( type='CustomDataset', metainfo=dict( classes=[ 'bulldozer', 'car', 'excavator', 'hole', 'person', 'truck', ], palette=[ ( 20, 220, 20, ), ( 20, 20, 240, ), ( 220, 20, 20, ), ( 40, 100, 150, ), ( 200, 50, 120, ), ( 200, 150, 150, ), ]), data_prefix='data/mmpretrain/pretrain_resnet_c6/', with_label=True, pipeline=[ dict(type='LoadImageFromFile'), dict(type='ResizeEdge', scale=640, edge='short'), dict(type='CenterCrop', crop_size=640), dict(type='PackInputs'), ]), sampler=dict(type='DefaultSampler', shuffle=False)) test_evaluator = dict( type='Accuracy', topk=( 1, 5, )) optim_wrapper = dict( optimizer=dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0001)) param_scheduler = dict( type='MultiStepLR', by_epoch=True, milestones=[ 30, 60, 90, ], gamma=0.1) train_cfg = dict(by_epoch=True, max_epochs=100, val_interval=1, val_begin=1) val_cfg = dict() test_cfg = dict() auto_scale_lr = dict(base_batch_size=256) default_scope = 'mmpretrain' default_hooks = dict( timer=dict(type='IterTimerHook'), logger=dict(type='LoggerHook', interval=10), param_scheduler=dict(type='ParamSchedulerHook'), checkpoint=dict( type='CheckpointHook', interval=1, max_keep_ckpts=4, save_best='auto'), sampler_seed=dict(type='DistSamplerSeedHook'), visualization=dict(type='VisualizationHook', enable=False)) env_cfg = dict( cudnn_benchmark=False, mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), dist_cfg=dict(backend='nccl')) vis_backends = [ dict(type='LocalVisBackend'), ] visualizer = dict( type='UniversalVisualizer', vis_backends=[ dict(type='LocalVisBackend'), dict(type='TensorboardVisBackend'), ]) log_level = 'INFO' load_from = 'mmpretrain/checkpoints/resnet50_8xb32_in1k_20210831-ea4938fc.pth' resume = False randomness = dict(seed=None, deterministic=False) max_epochs = 100 data_root = 'data/mmpretrain/pretrain_resnet_c6/' work_dir = 'mmpretrain/work_train_dir/resnet50_in1k_c6_pretrain_data' train_batch_size_per_gpu = 8 train_num_workers = 8 save_epoch_intervals = 1 class_name = [ 'bulldozer', 'car', 'excavator', 'hole', 'person', 'truck', ] num_classes = 6 metainfo = dict( classes=[ 'bulldozer', 'car', 'excavator', 'hole', 'person', 'truck', ], palette=[ ( 20, 220, 20, ), ( 20, 20, 240, ), ( 220, 20, 20, ), ( 40, 100, 150, ), ( 200, 50, 120, ), ( 200, 150, 150, ), ])

Will you implement it?

Ezra-Yu commented 1 year ago

Here you have change input image size to 640 * 640.

train_pipeline = [
dict(type='LoadImageFromFile'),
# dict(type='RandomResizedCrop', scale=224),   #
dict(type='RandomResizedCrop', scale=640),
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
dict(type='PackInputs'),
]
baiguosummer commented 1 year ago

Do i need to change it here? test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='ResizeEdge', scale=256, edge='short'),

dict(type='ResizeEdge', scale=640, edge='short'),

dict(type='CenterCrop', crop_size=224),
# dict(type='CenterCrop', crop_size=640),
dict(type='PackInputs'),

]

Ezra-Yu commented 1 year ago

just:

test_pipeline = [
dict(type='LoadImageFromFile'),
# dict(type='ResizeEdge', scale=640, edge='short'),
# dict(type='CenterCrop', crop_size=640),
dict(type='PackInputs'),
]
baiguosummer commented 1 year ago

(torch) panda@amd:/media/panda/nvme2T/pycharm/openMMlab$ bash mmpretrain/sh/4model_train.sh 08/07 12:20:51 - mmengine - INFO -

System environment: sys.platform: linux Python: 3.8.16 (default, Mar 2 2023, 03:21:46) [GCC 11.2.0] CUDA available: True numpy_random_seed: 710079749 GPU 0: NVIDIA GeForce RTX 3080 CUDA_HOME: /usr/local/cuda-11.3 NVCC: Cuda compilation tools, release 11.3, V11.3.58 GCC: gcc (Ubuntu 9.5.0-1ubuntu1~22.04) 9.5.0 PyTorch: 1.10.1 PyTorch compiling details: PyTorch built with:

Runtime environment: cudnn_benchmark: False mp_cfg: {'mp_start_method': 'fork', 'opencv_num_threads': 0} dist_cfg: {'backend': 'nccl'} seed: 710079749 deterministic: False Distributed launcher: none Distributed training: False GPU number: 1

08/07 12:20:51 - mmengine - INFO - Config: model = dict( type='ImageClassifier', backbone=dict( type='ResNet', depth=50, num_stages=4, out_indices=(3, ), style='pytorch', frozen_stages=-1), neck=dict(type='GlobalAveragePooling'), head=dict( type='LinearClsHead', num_classes=6, in_channels=2048, loss=dict(type='CrossEntropyLoss', loss_weight=1.0), topk=( 1, 5, )), init_cfg=dict( type='Pretrained', checkpoint= 'mmpretrain/checkpoints/resnet50_8xb32_in1k_20210831-ea4938fc.pth')) dataset_type = 'CustomDataset' data_preprocessor = dict( num_classes=6, mean=[ 123.675, 116.28, 103.53, ], std=[ 58.395, 57.12, 57.375, ], to_rgb=True) train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='RandomResizedCrop', scale=640), dict(type='RandomFlip', prob=0.5, direction='horizontal'), dict(type='PackInputs'), ] test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='PackInputs'), ] train_dataloader = dict( pin_memory=True, persistent_workers=True, collate_fn=dict(type='default_collate'), batch_size=8, num_workers=8, dataset=dict( type='CustomDataset', metainfo=dict( classes=[ 'bulldozer', 'car', 'excavator', 'hole', 'person', 'truck', ], palette=[ ( 20, 220, 20, ), ( 20, 20, 240, ), ( 220, 20, 20, ), ( 40, 100, 150, ), ( 200, 50, 120, ), ( 200, 150, 150, ), ]), data_prefix='data/mmpretrain/pretrain_c6/', with_label=True, pipeline=[ dict(type='LoadImageFromFile'), dict(type='RandomResizedCrop', scale=640), dict(type='RandomFlip', prob=0.5, direction='horizontal'), dict(type='PackInputs'), ]), sampler=dict(type='DefaultSampler', shuffle=True)) val_dataloader = dict( pin_memory=True, persistent_workers=True, collate_fn=dict(type='default_collate'), batch_size=32, num_workers=5, dataset=dict( type='CustomDataset', metainfo=dict( classes=[ 'bulldozer', 'car', 'excavator', 'hole', 'person', 'truck', ], palette=[ ( 20, 220, 20, ), ( 20, 20, 240, ), ( 220, 20, 20, ), ( 40, 100, 150, ), ( 200, 50, 120, ), ( 200, 150, 150, ), ]), data_prefix='data/mmpretrain/pretrain_c6/', with_label=True, pipeline=[ dict(type='LoadImageFromFile'), dict(type='PackInputs'), ]), sampler=dict(type='DefaultSampler', shuffle=False)) val_evaluator = dict( type='Accuracy', topk=( 1, 5, )) test_dataloader = dict( pin_memory=True, persistent_workers=True, collate_fn=dict(type='default_collate'), batch_size=32, num_workers=5, dataset=dict( type='CustomDataset', metainfo=dict( classes=[ 'bulldozer', 'car', 'excavator', 'hole', 'person', 'truck', ], palette=[ ( 20, 220, 20, ), ( 20, 20, 240, ), ( 220, 20, 20, ), ( 40, 100, 150, ), ( 200, 50, 120, ), ( 200, 150, 150, ), ]), data_prefix='data/mmpretrain/pretrain_c6/', with_label=True, pipeline=[ dict(type='LoadImageFromFile'), dict(type='PackInputs'), ]), sampler=dict(type='DefaultSampler', shuffle=False)) test_evaluator = dict( type='Accuracy', topk=( 1, 5, )) optim_wrapper = dict( optimizer=dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0001)) param_scheduler = dict( type='MultiStepLR', by_epoch=True, milestones=[ 30, 60, 90, ], gamma=0.1) train_cfg = dict(by_epoch=True, max_epochs=100, val_interval=1, val_begin=1) val_cfg = dict() test_cfg = dict() auto_scale_lr = dict(base_batch_size=256) default_scope = 'mmpretrain' default_hooks = dict( timer=dict(type='IterTimerHook'), logger=dict(type='LoggerHook', interval=10), param_scheduler=dict(type='ParamSchedulerHook'), checkpoint=dict( type='CheckpointHook', interval=1, max_keep_ckpts=4, save_best='auto'), sampler_seed=dict(type='DistSamplerSeedHook'), visualization=dict(type='VisualizationHook', enable=False)) env_cfg = dict( cudnn_benchmark=False, mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), dist_cfg=dict(backend='nccl')) vis_backends = [ dict(type='LocalVisBackend'), ] visualizer = dict( type='UniversalVisualizer', vis_backends=[ dict(type='LocalVisBackend'), dict(type='TensorboardVisBackend'), ]) log_level = 'INFO' load_from = 'mmpretrain/checkpoints/resnet50_8xb32_in1k_20210831-ea4938fc.pth' resume = False randomness = dict(seed=None, deterministic=False) max_epochs = 100 data_root = 'data/mmpretrain/pretrain_c6/' work_dir = 'mmpretrain/work_train_dir/resnet50_in1k_c6_pretrain_data' train_batch_size_per_gpu = 8 train_num_workers = 8 save_epoch_intervals = 1 class_name = [ 'bulldozer', 'car', 'excavator', 'hole', 'person', 'truck', ] num_classes = 6 metainfo = dict( classes=[ 'bulldozer', 'car', 'excavator', 'hole', 'person', 'truck', ], palette=[ ( 20, 220, 20, ), ( 20, 20, 240, ), ( 220, 20, 20, ), ( 40, 100, 150, ), ( 200, 50, 120, ), ( 200, 150, 150, ), ]) launcher = 'none'

08/07 12:20:54 - mmengine - INFO - Distributed training is not used, all SyncBatchNorm (SyncBN) layers in the model will be automatically reverted to BatchNormXd layers if they are used. 08/07 12:20:54 - mmengine - INFO - Hooks will be executed in the following order: before_run: (VERY_HIGH ) RuntimeInfoHook
(BELOW_NORMAL) LoggerHook


before_train: (VERY_HIGH ) RuntimeInfoHook
(NORMAL ) IterTimerHook
(VERY_LOW ) CheckpointHook


before_train_epoch: (VERY_HIGH ) RuntimeInfoHook
(NORMAL ) IterTimerHook
(NORMAL ) DistSamplerSeedHook


before_train_iter: (VERY_HIGH ) RuntimeInfoHook
(NORMAL ) IterTimerHook


after_train_iter: (VERY_HIGH ) RuntimeInfoHook
(NORMAL ) IterTimerHook
(BELOW_NORMAL) LoggerHook
(LOW ) ParamSchedulerHook
(VERY_LOW ) CheckpointHook


after_train_epoch: (NORMAL ) IterTimerHook
(LOW ) ParamSchedulerHook
(VERY_LOW ) CheckpointHook


before_val_epoch: (NORMAL ) IterTimerHook


before_val_iter: (NORMAL ) IterTimerHook


after_val_iter: (NORMAL ) IterTimerHook
(NORMAL ) VisualizationHook
(BELOW_NORMAL) LoggerHook


after_val_epoch: (VERY_HIGH ) RuntimeInfoHook
(NORMAL ) IterTimerHook
(BELOW_NORMAL) LoggerHook
(LOW ) ParamSchedulerHook
(VERY_LOW ) CheckpointHook


after_train: (VERY_LOW ) CheckpointHook


before_test_epoch: (NORMAL ) IterTimerHook


before_test_iter: (NORMAL ) IterTimerHook


after_test_iter: (NORMAL ) IterTimerHook
(NORMAL ) VisualizationHook
(BELOW_NORMAL) LoggerHook


after_test_epoch: (VERY_HIGH ) RuntimeInfoHook
(NORMAL ) IterTimerHook
(BELOW_NORMAL) LoggerHook


after_run: (BELOW_NORMAL) LoggerHook


08/07 12:20:55 - mmengine - INFO - load model from: mmpretrain/checkpoints/resnet50_8xb32_in1k_20210831-ea4938fc.pth 08/07 12:20:55 - mmengine - INFO - Loads checkpoint by local backend from path: mmpretrain/checkpoints/resnet50_8xb32_in1k_20210831-ea4938fc.pth 08/07 12:20:55 - mmengine - WARNING - The model and loaded state dict do not match exactly

size mismatch for head.fc.weight: copying a param with shape torch.Size([1000, 2048]) from checkpoint, the shape in current model is torch.Size([6, 2048]). size mismatch for head.fc.bias: copying a param with shape torch.Size([1000]) from checkpoint, the shape in current model is torch.Size([6]). Loads checkpoint by local backend from path: mmpretrain/checkpoints/resnet50_8xb32_in1k_20210831-ea4938fc.pth The model and loaded state dict do not match exactly

size mismatch for head.fc.weight: copying a param with shape torch.Size([1000, 2048]) from checkpoint, the shape in current model is torch.Size([6, 2048]). size mismatch for head.fc.bias: copying a param with shape torch.Size([1000]) from checkpoint, the shape in current model is torch.Size([6]). 08/07 12:20:55 - mmengine - INFO - Load checkpoint from mmpretrain/checkpoints/resnet50_8xb32_in1k_20210831-ea4938fc.pth 08/07 12:20:55 - mmengine - WARNING - "FileClient" will be deprecated in future. Please use io functions in https://mmengine.readthedocs.io/en/latest/api/fileio.html#file-io 08/07 12:20:55 - mmengine - WARNING - "HardDiskBackend" is the alias of "LocalBackend" and the former will be deprecated in future. 08/07 12:20:55 - mmengine - INFO - Checkpoints will be saved to /media/panda/nvme2T/pycharm/openMMlab/mmpretrain/work_train_dir/resnet50_in1k_c6_pretrain_data. 08/07 12:20:57 - mmengine - INFO - Epoch(train) [1][ 10/994] lr: 1.0000e-01 eta: 7:09:02 time: 0.2590 data_time: 0.0298 memory: 5678 loss: 4.8993 08/07 12:20:59 - mmengine - INFO - Epoch(train) [1][ 20/994] lr: 1.0000e-01 eta: 6:07:16 time: 0.1845 data_time: 0.0005 memory: 5678 loss: 4.8828 08/07 12:21:01 - mmengine - INFO - Epoch(train) [1][ 30/994] lr: 1.0000e-01 eta: 5:47:02 time: 0.1852 data_time: 0.0006 memory: 5678 loss: 2.5028 08/07 12:21:03 - mmengine - INFO - Epoch(train) [1][ 40/994] lr: 1.0000e-01 eta: 5:36:40 time: 0.1846 data_time: 0.0004 memory: 5678 loss: 1.8539 08/07 12:21:05 - mmengine - INFO - Epoch(train) [1][ 50/994] lr: 1.0000e-01 eta: 5:30:31 time: 0.1848 data_time: 0.0004 memory: 5678 loss: 1.7719 08/07 12:21:07 - mmengine - INFO - Epoch(train) [1][ 60/994] lr: 1.0000e-01 eta: 5:26:18 time: 0.1845 data_time: 0.0005 memory: 5678 loss: 1.9383 08/07 12:21:08 - mmengine - INFO - Epoch(train) [1][ 70/994] lr: 1.0000e-01 eta: 5:23:22 time: 0.1848 data_time: 0.0004 memory: 5678 loss: 1.8294 08/07 12:21:10 - mmengine - INFO - Epoch(train) [1][ 80/994] lr: 1.0000e-01 eta: 5:21:17 time: 0.1854 data_time: 0.0004 memory: 5678 loss: 1.7651 08/07 12:21:12 - mmengine - INFO - Epoch(train) [1][ 90/994] lr: 1.0000e-01 eta: 5:19:33 time: 0.1849 data_time: 0.0004 memory: 5678 loss: 1.7655 08/07 12:21:14 - mmengine - INFO - Epoch(train) [1][100/994] lr: 1.0000e-01 eta: 5:18:17 time: 0.1856 data_time: 0.0005 memory: 5678 loss: 1.7534 08/07 12:21:16 - mmengine - INFO - Epoch(train) [1][110/994] lr: 1.0000e-01 eta: 5:17:19 time: 0.1861 data_time: 0.0005 memory: 5678 loss: 1.8715 08/07 12:21:18 - mmengine - INFO - Epoch(train) [1][120/994] lr: 1.0000e-01 eta: 5:16:26 time: 0.1856 data_time: 0.0004 memory: 5678 loss: 1.8009 08/07 12:21:20 - mmengine - INFO - Epoch(train) [1][130/994] lr: 1.0000e-01 eta: 5:15:42 time: 0.1857 data_time: 0.0004 memory: 5678 loss: 1.7802 08/07 12:21:21 - mmengine - INFO - Epoch(train) [1][140/994] lr: 1.0000e-01 eta: 5:15:03 time: 0.1856 data_time: 0.0004 memory: 5678 loss: 1.7233 08/07 12:21:23 - mmengine - INFO - Epoch(train) [1][150/994] lr: 1.0000e-01 eta: 5:14:28 time: 0.1854 data_time: 0.0003 memory: 5678 loss: 1.7147 08/07 12:21:25 - mmengine - INFO - Epoch(train) [1][160/994] lr: 1.0000e-01 eta: 5:13:56 time: 0.1853 data_time: 0.0003 memory: 5678 loss: 1.7827 08/07 12:21:27 - mmengine - INFO - Epoch(train) [1][170/994] lr: 1.0000e-01 eta: 5:13:31 time: 0.1858 data_time: 0.0003 memory: 5678 loss: 1.8007 08/07 12:21:29 - mmengine - INFO - Epoch(train) [1][180/994] lr: 1.0000e-01 eta: 5:13:10 time: 0.1862 data_time: 0.0003 memory: 5678 loss: 1.7644 08/07 12:21:31 - mmengine - INFO - Epoch(train) [1][190/994] lr: 1.0000e-01 eta: 5:12:51 time: 0.1861 data_time: 0.0003 memory: 5678 loss: 1.7630 08/07 12:21:33 - mmengine - INFO - Epoch(train) [1][200/994] lr: 1.0000e-01 eta: 5:12:31 time: 0.1855 data_time: 0.0003 memory: 5678 loss: 1.8313 08/07 12:21:34 - mmengine - INFO - Epoch(train) [1][210/994] lr: 1.0000e-01 eta: 5:12:12 time: 0.1855 data_time: 0.0003 memory: 5678 loss: 1.7507 08/07 12:21:36 - mmengine - INFO - Epoch(train) [1][220/994] lr: 1.0000e-01 eta: 5:11:59 time: 0.1864 data_time: 0.0003 memory: 5678 loss: 1.7505 08/07 12:21:38 - mmengine - INFO - Epoch(train) [1][230/994] lr: 1.0000e-01 eta: 5:11:47 time: 0.1865 data_time: 0.0004 memory: 5678 loss: 1.8176 08/07 12:21:40 - mmengine - INFO - Epoch(train) [1][240/994] lr: 1.0000e-01 eta: 5:11:35 time: 0.1863 data_time: 0.0004 memory: 5678 loss: 1.7509 08/07 12:21:42 - mmengine - INFO - Epoch(train) [1][250/994] lr: 1.0000e-01 eta: 5:11:25 time: 0.1865 data_time: 0.0004 memory: 5678 loss: 1.7587 08/07 12:21:44 - mmengine - INFO - Epoch(train) [1][260/994] lr: 1.0000e-01 eta: 5:11:19 time: 0.1873 data_time: 0.0004 memory: 5678 loss: 1.6567 08/07 12:21:46 - mmengine - INFO - Epoch(train) [1][270/994] lr: 1.0000e-01 eta: 5:11:09 time: 0.1863 data_time: 0.0003 memory: 5678 loss: 1.7768 08/07 12:21:47 - mmengine - INFO - Epoch(train) [1][280/994] lr: 1.0000e-01 eta: 5:11:00 time: 0.1862 data_time: 0.0003 memory: 5678 loss: 1.6708 08/07 12:21:49 - mmengine - INFO - Epoch(train) [1][290/994] lr: 1.0000e-01 eta: 5:10:52 time: 0.1866 data_time: 0.0003 memory: 5678 loss: 1.6348 08/07 12:21:51 - mmengine - INFO - Epoch(train) [1][300/994] lr: 1.0000e-01 eta: 5:10:45 time: 0.1864 data_time: 0.0003 memory: 5678 loss: 1.7324 08/07 12:21:53 - mmengine - INFO - Epoch(train) [1][310/994] lr: 1.0000e-01 eta: 5:10:37 time: 0.1864 data_time: 0.0003 memory: 5678 loss: 1.7248 08/07 12:21:55 - mmengine - INFO - Epoch(train) [1][320/994] lr: 1.0000e-01 eta: 5:10:32 time: 0.1870 data_time: 0.0003 memory: 5678 loss: 1.6571 08/07 12:21:57 - mmengine - INFO - Epoch(train) [1][330/994] lr: 1.0000e-01 eta: 5:10:24 time: 0.1861 data_time: 0.0004 memory: 5678 loss: 1.6330 08/07 12:21:59 - mmengine - INFO - Epoch(train) [1][340/994] lr: 1.0000e-01 eta: 5:10:19 time: 0.1870 data_time: 0.0004 memory: 5678 loss: 1.6707 08/07 12:22:01 - mmengine - INFO - Epoch(train) [1][350/994] lr: 1.0000e-01 eta: 5:10:13 time: 0.1862 data_time: 0.0004 memory: 5678 loss: 1.6293 08/07 12:22:02 - mmengine - INFO - Epoch(train) [1][360/994] lr: 1.0000e-01 eta: 5:10:07 time: 0.1867 data_time: 0.0004 memory: 5678 loss: 1.7574 08/07 12:22:04 - mmengine - INFO - Epoch(train) [1][370/994] lr: 1.0000e-01 eta: 5:10:03 time: 0.1869 data_time: 0.0004 memory: 5678 loss: 1.6525 08/07 12:22:06 - mmengine - INFO - Epoch(train) [1][380/994] lr: 1.0000e-01 eta: 5:09:57 time: 0.1863 data_time: 0.0004 memory: 5678 loss: 1.6844 08/07 12:22:08 - mmengine - INFO - Epoch(train) [1][390/994] lr: 1.0000e-01 eta: 5:09:52 time: 0.1866 data_time: 0.0004 memory: 5678 loss: 1.7426 08/07 12:22:10 - mmengine - INFO - Epoch(train) [1][400/994] lr: 1.0000e-01 eta: 5:09:47 time: 0.1866 data_time: 0.0004 memory: 5678 loss: 1.6617 08/07 12:22:12 - mmengine - INFO - Epoch(train) [1][410/994] lr: 1.0000e-01 eta: 5:09:43 time: 0.1867 data_time: 0.0004 memory: 5678 loss: 1.6957 08/07 12:22:14 - mmengine - INFO - Epoch(train) [1][420/994] lr: 1.0000e-01 eta: 5:09:37 time: 0.1860 data_time: 0.0004 memory: 5678 loss: 1.5608 08/07 12:22:15 - mmengine - INFO - Epoch(train) [1][430/994] lr: 1.0000e-01 eta: 5:09:32 time: 0.1865 data_time: 0.0004 memory: 5678 loss: 1.5062 08/07 12:22:17 - mmengine - INFO - Epoch(train) [1][440/994] lr: 1.0000e-01 eta: 5:09:27 time: 0.1862 data_time: 0.0004 memory: 5678 loss: 1.5980 08/07 12:22:19 - mmengine - INFO - Epoch(train) [1][450/994] lr: 1.0000e-01 eta: 5:09:22 time: 0.1863 data_time: 0.0004 memory: 5678 loss: 1.6076 08/07 12:22:21 - mmengine - INFO - Epoch(train) [1][460/994] lr: 1.0000e-01 eta: 5:09:17 time: 0.1861 data_time: 0.0004 memory: 5678 loss: 1.5030 08/07 12:22:23 - mmengine - INFO - Epoch(train) [1][470/994] lr: 1.0000e-01 eta: 5:09:13 time: 0.1866 data_time: 0.0004 memory: 5678 loss: 1.5996 08/07 12:22:25 - mmengine - INFO - Epoch(train) [1][480/994] lr: 1.0000e-01 eta: 5:09:09 time: 0.1866 data_time: 0.0004 memory: 5678 loss: 1.7244 08/07 12:22:27 - mmengine - INFO - Epoch(train) [1][490/994] lr: 1.0000e-01 eta: 5:09:06 time: 0.1867 data_time: 0.0005 memory: 5678 loss: 1.6872 08/07 12:22:28 - mmengine - INFO - Epoch(train) [1][500/994] lr: 1.0000e-01 eta: 5:09:02 time: 0.1862 data_time: 0.0004 memory: 5678 loss: 1.5178 08/07 12:22:30 - mmengine - INFO - Epoch(train) [1][510/994] lr: 1.0000e-01 eta: 5:08:58 time: 0.1866 data_time: 0.0004 memory: 5678 loss: 1.4656 08/07 12:22:32 - mmengine - INFO - Epoch(train) [1][520/994] lr: 1.0000e-01 eta: 5:08:54 time: 0.1865 data_time: 0.0004 memory: 5678 loss: 1.5752 08/07 12:22:34 - mmengine - INFO - Epoch(train) [1][530/994] lr: 1.0000e-01 eta: 5:08:49 time: 0.1858 data_time: 0.0004 memory: 5678 loss: 1.6735 08/07 12:22:36 - mmengine - INFO - Epoch(train) [1][540/994] lr: 1.0000e-01 eta: 5:08:44 time: 0.1855 data_time: 0.0004 memory: 5678 loss: 1.5690 08/07 12:22:38 - mmengine - INFO - Epoch(train) [1][550/994] lr: 1.0000e-01 eta: 5:08:39 time: 0.1858 data_time: 0.0004 memory: 5678 loss: 1.4115 08/07 12:22:40 - mmengine - INFO - Epoch(train) [1][560/994] lr: 1.0000e-01 eta: 5:08:35 time: 0.1860 data_time: 0.0004 memory: 5678 loss: 1.5018 08/07 12:22:42 - mmengine - INFO - Epoch(train) [1][570/994] lr: 1.0000e-01 eta: 5:08:30 time: 0.1858 data_time: 0.0004 memory: 5678 loss: 1.4633 08/07 12:22:43 - mmengine - INFO - Epoch(train) [1][580/994] lr: 1.0000e-01 eta: 5:08:26 time: 0.1858 data_time: 0.0004 memory: 5678 loss: 1.4065 08/07 12:22:45 - mmengine - INFO - Epoch(train) [1][590/994] lr: 1.0000e-01 eta: 5:08:21 time: 0.1855 data_time: 0.0004 memory: 5678 loss: 1.3639 08/07 12:22:47 - mmengine - INFO - Epoch(train) [1][600/994] lr: 1.0000e-01 eta: 5:08:16 time: 0.1856 data_time: 0.0004 memory: 5678 loss: 1.4719 08/07 12:22:49 - mmengine - INFO - Epoch(train) [1][610/994] lr: 1.0000e-01 eta: 5:08:12 time: 0.1856 data_time: 0.0004 memory: 5678 loss: 1.4566 08/07 12:22:51 - mmengine - INFO - Epoch(train) [1][620/994] lr: 1.0000e-01 eta: 5:08:08 time: 0.1860 data_time: 0.0004 memory: 5678 loss: 1.4818 08/07 12:22:53 - mmengine - INFO - Epoch(train) [1][630/994] lr: 1.0000e-01 eta: 5:08:04 time: 0.1858 data_time: 0.0004 memory: 5678 loss: 1.5052 08/07 12:22:54 - mmengine - INFO - Epoch(train) [1][640/994] lr: 1.0000e-01 eta: 5:07:59 time: 0.1853 data_time: 0.0004 memory: 5678 loss: 1.5845 08/07 12:22:56 - mmengine - INFO - Epoch(train) [1][650/994] lr: 1.0000e-01 eta: 5:07:56 time: 0.1859 data_time: 0.0004 memory: 5678 loss: 1.5109 08/07 12:22:58 - mmengine - INFO - Epoch(train) [1][660/994] lr: 1.0000e-01 eta: 5:07:52 time: 0.1858 data_time: 0.0004 memory: 5678 loss: 1.5308 08/07 12:23:00 - mmengine - INFO - Epoch(train) [1][670/994] lr: 1.0000e-01 eta: 5:07:48 time: 0.1860 data_time: 0.0004 memory: 5678 loss: 1.6288 08/07 12:23:02 - mmengine - INFO - Epoch(train) [1][680/994] lr: 1.0000e-01 eta: 5:07:44 time: 0.1857 data_time: 0.0004 memory: 5678 loss: 1.6948 08/07 12:23:04 - mmengine - INFO - Epoch(train) [1][690/994] lr: 1.0000e-01 eta: 5:07:41 time: 0.1861 data_time: 0.0004 memory: 5678 loss: 1.2794 08/07 12:23:06 - mmengine - INFO - Epoch(train) [1][700/994] lr: 1.0000e-01 eta: 5:07:37 time: 0.1857 data_time: 0.0004 memory: 5678 loss: 1.4354 08/07 12:23:08 - mmengine - INFO - Epoch(train) [1][710/994] lr: 1.0000e-01 eta: 5:07:34 time: 0.1859 data_time: 0.0004 memory: 5678 loss: 1.4808 08/07 12:23:09 - mmengine - INFO - Epoch(train) [1][720/994] lr: 1.0000e-01 eta: 5:07:30 time: 0.1857 data_time: 0.0004 memory: 5678 loss: 1.4884 08/07 12:23:11 - mmengine - INFO - Epoch(train) [1][730/994] lr: 1.0000e-01 eta: 5:07:27 time: 0.1860 data_time: 0.0004 memory: 5678 loss: 1.3789 08/07 12:23:13 - mmengine - INFO - Epoch(train) [1][740/994] lr: 1.0000e-01 eta: 5:07:24 time: 0.1861 data_time: 0.0004 memory: 5678 loss: 1.5289 08/07 12:23:15 - mmengine - INFO - Epoch(train) [1][750/994] lr: 1.0000e-01 eta: 5:07:21 time: 0.1857 data_time: 0.0004 memory: 5678 loss: 1.4697 08/07 12:23:17 - mmengine - INFO - Epoch(train) [1][760/994] lr: 1.0000e-01 eta: 5:07:18 time: 0.1865 data_time: 0.0005 memory: 5678 loss: 1.3652 08/07 12:23:19 - mmengine - INFO - Epoch(train) [1][770/994] lr: 1.0000e-01 eta: 5:07:15 time: 0.1859 data_time: 0.0004 memory: 5678 loss: 1.4149 08/07 12:23:21 - mmengine - INFO - Epoch(train) [1][780/994] lr: 1.0000e-01 eta: 5:07:12 time: 0.1858 data_time: 0.0004 memory: 5678 loss: 1.3436 08/07 12:23:22 - mmengine - INFO - Epoch(train) [1][790/994] lr: 1.0000e-01 eta: 5:07:09 time: 0.1861 data_time: 0.0004 memory: 5678 loss: 1.4388 08/07 12:23:24 - mmengine - INFO - Epoch(train) [1][800/994] lr: 1.0000e-01 eta: 5:07:06 time: 0.1862 data_time: 0.0004 memory: 5678 loss: 1.3684 08/07 12:23:26 - mmengine - INFO - Epoch(train) [1][810/994] lr: 1.0000e-01 eta: 5:07:03 time: 0.1859 data_time: 0.0004 memory: 5678 loss: 1.5049 08/07 12:23:28 - mmengine - INFO - Epoch(train) [1][820/994] lr: 1.0000e-01 eta: 5:07:00 time: 0.1858 data_time: 0.0004 memory: 5678 loss: 1.6020 08/07 12:23:30 - mmengine - INFO - Epoch(train) [1][830/994] lr: 1.0000e-01 eta: 5:06:57 time: 0.1859 data_time: 0.0004 memory: 5678 loss: 1.4947 08/07 12:23:32 - mmengine - INFO - Epoch(train) [1][840/994] lr: 1.0000e-01 eta: 5:06:54 time: 0.1857 data_time: 0.0004 memory: 5678 loss: 1.4567 08/07 12:23:34 - mmengine - INFO - Epoch(train) [1][850/994] lr: 1.0000e-01 eta: 5:06:50 time: 0.1856 data_time: 0.0004 memory: 5678 loss: 1.2484 08/07 12:23:35 - mmengine - INFO - Epoch(train) [1][860/994] lr: 1.0000e-01 eta: 5:06:48 time: 0.1859 data_time: 0.0004 memory: 5678 loss: 1.4109 08/07 12:23:37 - mmengine - INFO - Epoch(train) [1][870/994] lr: 1.0000e-01 eta: 5:06:45 time: 0.1858 data_time: 0.0004 memory: 5678 loss: 1.3896 08/07 12:23:39 - mmengine - INFO - Epoch(train) [1][880/994] lr: 1.0000e-01 eta: 5:06:42 time: 0.1858 data_time: 0.0004 memory: 5678 loss: 1.4797 08/07 12:23:41 - mmengine - INFO - Epoch(train) [1][890/994] lr: 1.0000e-01 eta: 5:06:38 time: 0.1857 data_time: 0.0004 memory: 5678 loss: 1.3543 08/07 12:23:43 - mmengine - INFO - Epoch(train) [1][900/994] lr: 1.0000e-01 eta: 5:06:36 time: 0.1858 data_time: 0.0004 memory: 5678 loss: 1.2263 08/07 12:23:45 - mmengine - INFO - Epoch(train) [1][910/994] lr: 1.0000e-01 eta: 5:06:33 time: 0.1860 data_time: 0.0004 memory: 5678 loss: 1.2330 08/07 12:23:47 - mmengine - INFO - Epoch(train) [1][920/994] lr: 1.0000e-01 eta: 5:06:30 time: 0.1856 data_time: 0.0004 memory: 5678 loss: 1.2836 08/07 12:23:48 - mmengine - INFO - Epoch(train) [1][930/994] lr: 1.0000e-01 eta: 5:06:27 time: 0.1857 data_time: 0.0004 memory: 5678 loss: 1.3772 08/07 12:23:50 - mmengine - INFO - Epoch(train) [1][940/994] lr: 1.0000e-01 eta: 5:06:24 time: 0.1863 data_time: 0.0004 memory: 5678 loss: 1.5221 08/07 12:23:52 - mmengine - INFO - Epoch(train) [1][950/994] lr: 1.0000e-01 eta: 5:06:22 time: 0.1858 data_time: 0.0004 memory: 5678 loss: 1.5013 08/07 12:23:54 - mmengine - INFO - Epoch(train) [1][960/994] lr: 1.0000e-01 eta: 5:06:19 time: 0.1856 data_time: 0.0004 memory: 5678 loss: 1.3082 08/07 12:23:56 - mmengine - INFO - Epoch(train) [1][970/994] lr: 1.0000e-01 eta: 5:06:16 time: 0.1860 data_time: 0.0004 memory: 5678 loss: 1.3787 08/07 12:23:58 - mmengine - INFO - Epoch(train) [1][980/994] lr: 1.0000e-01 eta: 5:06:13 time: 0.1853 data_time: 0.0004 memory: 5678 loss: 1.2518 08/07 12:24:00 - mmengine - INFO - Epoch(train) [1][990/994] lr: 1.0000e-01 eta: 5:06:10 time: 0.1859 data_time: 0.0003 memory: 5678 loss: 1.3192 08/07 12:24:00 - mmengine - INFO - Exp name: resnet50_8xb32_in1k_c6_20230807_122051 08/07 12:24:00 - mmengine - INFO - Saving checkpoint at 1 epochs Traceback (most recent call last): File "mmpretrain/tools/train.py", line 166, in main() File "mmpretrain/tools/train.py", line 162, in main runner.train() File "/home/panda/anaconda3/envs/torch/lib/python3.8/site-packages/mmengine/runner/runner.py", line 1735, in train model = self.train_loop.run() # type: ignore File "/home/panda/anaconda3/envs/torch/lib/python3.8/site-packages/mmengine/runner/loops.py", line 102, in run self.runner.val_loop.run() File "/home/panda/anaconda3/envs/torch/lib/python3.8/site-packages/mmengine/runner/loops.py", line 362, in run for idx, data_batch in enumerate(self.dataloader): File "/home/panda/anaconda3/envs/torch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 521, in next data = self._next_data() File "/home/panda/anaconda3/envs/torch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1203, in _next_data return self._process_data(data) File "/home/panda/anaconda3/envs/torch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1229, in _process_data data.reraise() File "/home/panda/anaconda3/envs/torch/lib/python3.8/site-packages/torch/_utils.py", line 434, in reraise raise exception RuntimeError: Caught RuntimeError in DataLoader worker process 0. Original Traceback (most recent call last): File "/home/panda/anaconda3/envs/torch/lib/python3.8/site-packages/torch/utils/data/_utils/worker.py", line 287, in _worker_loop data = fetcher.fetch(index) File "/home/panda/anaconda3/envs/torch/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 52, in fetch return self.collate_fn(data) File "/home/panda/anaconda3/envs/torch/lib/python3.8/site-packages/mmengine/dataset/utils.py", line 160, in default_collate return data_item_type({ File "/home/panda/anaconda3/envs/torch/lib/python3.8/site-packages/mmengine/dataset/utils.py", line 161, in key: default_collate([d[key] for d in data_batch]) File "/home/panda/anaconda3/envs/torch/lib/python3.8/site-packages/mmengine/dataset/utils.py", line 165, in default_collate return torch_default_collate(data_batch) File "/home/panda/anaconda3/envs/torch/lib/python3.8/site-packages/torch/utils/data/_utils/collate.py", line 56, in default_collate return torch.stack(batch, 0, out=out) RuntimeError: stack expects each tensor to be equal size, but got [3, 121, 100] at entry 0 and [3, 66, 100] at entry 1

baiguosummer commented 1 year ago

following this setting,the code runs an error