Open mrFocusXin opened 1 year ago
You need to write dataset_type = 'mmcls.CIFAR10'
to use CIFAR10
@fangyixiao18 Thank your replay!! Now my config file have changed dataset_type = 'mmcls.CIFAR10', as follows:
base = [ '../base/models/odc.py',
'../_base_/schedules/sgd_steplr-200e_in1k.py',
'../_base_/default_runtime.py',
] #
dataset_type = 'mmcls.CIFAR10' data_root = '/home/wangxin/cifar' file_client_args = dict(backend='disk')
train_pipeline = [ dict(type='LoadImageFromFile', file_client_args=file_client_args), dict(type='RandomResizedCrop', size=224, backend='pillow'), dict(type='RandomFlip', prob=0.5), dict(type='RandomRotation', degrees=2), dict( type='ColorJitter', brightness=0.4, contrast=0.4, saturation=1.0, hue=0.5), dict( type='RandomGrayscale', prob=0.2, keep_channels=True, channel_weights=(0.114, 0.587, 0.2989)), dict( type='PackSelfSupInputs', algorithm_keys=['sample_idx'], meta_keys=['img_path']) ]
extract_pipeline = [ dict(type='LoadImageFromFile', file_client_args=file_client_args), dict(type='mmcls.ResizeEdge', scale=256, edge='short', backend='pillow'), dict(type='CenterCrop', crop_size=224), dict(type='PackSelfSupInputs', meta_keys=['img_path']) ]
train_dataloader = dict(
batch_size=64,
num_workers=4,
persistent_workers=True,
sampler=dict(type='DeepClusterSampler', shuffle=True, replace=True),
collate_fn=dict(type='default_collate'),
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file='',
test_mode=True,
data_prefix='/home/wangxin/cifar',
pipeline=train_pipeline
))
num_classes = 10000 custom_hooks = [ dict( type='DeepClusterHook', extract_dataloader=dict( batch_size=128, num_workers=8, persistent_workers=True, sampler=dict(type='DefaultSampler', shuffle=False, round_up=True), collate_fn=dict(type='default_collate'), dataset=dict(
data_root=data_root,
test_mode=True,
data_prefix='/home/wangxin/cifar',
type=dataset_type,
# pipeline=extract_pipeline
)),
clustering=dict(type='Kmeans', k=num_classes, pca_dim=-1), # no pca
unif_sampling=False,
reweight=True,
reweight_pow=0.5,
init_memory=True,
initial=True, # call initially
interval=9999999999), # initial only
dict(
type='ODCHook',
centroids_update_interval=10, # iter
deal_with_small_clusters_interval=1,
evaluate_interval=50,
reweight=True,
reweight_pow=0.5)
]
#
model = dict( head=dict(num_classes=num_classes), memory_bank=dict(num_classes=num_classes), )
optimizer = dict(type='SGD', lr=0.06, weight_decay=1e-5, momentum=0.9) optim_wrapper = dict( type='OptimWrapper', optimizer=optimizer, paramwise_cfg=dict(custom_keys={'head': dict(momentum=0.)}))
param_scheduler = [ dict(type='MultiStepLR', by_epoch=True, milestones=[400], gamma=0.4) ]
train_cfg = dict( max_epochs=440)
default_hooks = dict( checkpoint=dict(type='CheckpointHook', interval=10, max_keep_ckpts=3))
Then there have Error as follows:
System environment: sys.platform: linux Python: 3.8.15 (default, Nov 11 2022, 14:08:18) [GCC 11.2.0] CUDA available: False numpy_random_seed: 1330821374 GCC: gcc (Ubuntu 11.3.0-1ubuntu1~22.04) 11.3.0 PyTorch: 1.13.1 PyTorch compiling details: PyTorch built with:
Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CXX_COMPILER=/opt/rh/devtoolset-9/root/usr/bin/c++, CXX_FLAGS= -fabi-version=11 -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOCUPTI -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -DEDGE_PROFILER_USE_KINETO -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wunused-local-typedefs -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=1.13.1, USE_CUDA=0, USE_CUDNN=OFF, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=OFF, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF,
TorchVision: 0.14.1 OpenCV: 4.7.0 MMEngine: 0.5.0
02/21 08:06:24 - mmengine - INFO - Config: model = dict( type='ODC', data_preprocessor=dict( mean=(123.675, 116.28, 103.53), std=(58.395, 57.12, 57.375), bgr_to_rgb=True), backbone=dict( type='ResNet', depth=50, in_channels=3, out_indices=[4], norm_cfg=dict(type='SyncBN')), neck=dict( type='ODCNeck', in_channels=2048, hid_channels=512, out_channels=256, with_avg_pool=True), head=dict( type='ClsHead', loss=dict(type='mmcls.CrossEntropyLoss'), with_avg_pool=False, in_channels=256, num_classes=10000), memory_bank=dict( type='ODCMemory', length=1281167, feat_dim=256, momentum=0.5, num_classes=10000, min_cluster=20, debug=False)) optimizer = dict(type='SGD', lr=0.06, weight_decay=1e-05, momentum=0.9) optim_wrapper = dict( type='OptimWrapper', optimizer=dict(type='SGD', lr=0.06, weight_decay=1e-05, momentum=0.9), paramwise_cfg=dict(custom_keys=dict(head=dict(momentum=0.0)))) param_scheduler = [ dict(type='MultiStepLR', by_epoch=True, milestones=[400], gamma=0.4) ] train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=440) default_scope = 'mmselfsup' default_hooks = dict( runtime_info=dict(type='RuntimeInfoHook'), timer=dict(type='IterTimerHook'), logger=dict(type='LoggerHook', interval=50), param_scheduler=dict(type='ParamSchedulerHook'), checkpoint=dict(type='CheckpointHook', interval=10, max_keep_ckpts=3), sampler_seed=dict(type='DistSamplerSeedHook')) env_cfg = dict( cudnn_benchmark=False, mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), dist_cfg=dict(backend='nccl')) log_processor = dict( window_size=10, custom_cfg=[dict(data_src='', method='mean', window_size='global')]) vis_backends = [dict(type='LocalVisBackend')] visualizer = dict( type='SelfSupVisualizer', vis_backends=[dict(type='LocalVisBackend')], name='visualizer') log_level = 'INFO' load_from = None resume = False dataset_type = 'mmcls.CIFAR10' data_root = '/home/wangxin/cifar' file_client_args = dict(backend='disk') train_pipeline = [ dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), dict(type='RandomResizedCrop', size=224, backend='pillow'), dict(type='RandomFlip', prob=0.5), dict(type='RandomRotation', degrees=2), dict( type='ColorJitter', brightness=0.4, contrast=0.4, saturation=1.0, hue=0.5), dict( type='RandomGrayscale', prob=0.2, keep_channels=True, channel_weights=(0.114, 0.587, 0.2989)), dict( type='PackSelfSupInputs', algorithm_keys=['sample_idx'], meta_keys=['img_path']) ] extract_pipeline = [ dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), dict(type='mmcls.ResizeEdge', scale=256, edge='short', backend='pillow'), dict(type='CenterCrop', crop_size=224), dict(type='PackSelfSupInputs', meta_keys=['img_path']) ] train_dataloader = dict( batch_size=64, num_workers=4, persistent_workers=True, sampler=dict(type='DeepClusterSampler', shuffle=True, replace=True), collate_fn=dict(type='default_collate'), dataset=dict( type='mmcls.CIFAR10', data_root='/home/wangxin/cifar', ann_file='', test_mode=True, data_prefix='/home/wangxin/cifar', pipeline=[ dict( type='LoadImageFromFile', file_client_args=dict(backend='disk')), dict(type='RandomResizedCrop', size=224, backend='pillow'), dict(type='RandomFlip', prob=0.5), dict(type='RandomRotation', degrees=2), dict( type='ColorJitter', brightness=0.4, contrast=0.4, saturation=1.0, hue=0.5), dict( type='RandomGrayscale', prob=0.2, keep_channels=True, channel_weights=(0.114, 0.587, 0.2989)), dict( type='PackSelfSupInputs', algorithm_keys=['sample_idx'], meta_keys=['img_path']) ])) num_classes = 10000 custom_hooks = [ dict( type='DeepClusterHook', extract_dataloader=dict( batch_size=128, num_workers=8, persistent_workers=True, sampler=dict(type='DefaultSampler', shuffle=False, round_up=True), collate_fn=dict(type='default_collate'), dataset=dict( data_root='/home/wangxin/cifar', test_mode=True, data_prefix='/home/wangxin/cifar', type='mmcls.CIFAR10')), clustering=dict(type='Kmeans', k=10000, pca_dim=-1), unif_sampling=False, reweight=True, reweight_pow=0.5, init_memory=True, initial=True, interval=9999999999), dict( type='ODCHook', centroids_update_interval=10, deal_with_small_clusters_interval=1, evaluate_interval=50, reweight=True, reweight_pow=0.5) ] launcher = 'none' work_dir = './work_dirs/selfsup/odc_resnet50_8xb64-steplr-440e_cfiar10'
02/21 08:06:24 - mmengine - WARNING - The "visualizer" registry in mmselfsup did not set import location. Fallback to call mmselfsup.utils.register_all_modules
instead.
02/21 08:06:24 - mmengine - WARNING - The "vis_backend" registry in mmselfsup did not set import location. Fallback to call mmselfsup.utils.register_all_modules
instead.
02/21 08:06:24 - mmengine - WARNING - The "model" registry in mmselfsup did not set import location. Fallback to call mmselfsup.utils.register_all_modules
instead.
02/21 08:06:24 - mmengine - WARNING - The "model" registry in mmcls did not set import location. Fallback to call mmcls.utils.register_all_modules
instead.
02/21 08:06:25 - mmengine - INFO - Distributed training is not used, all SyncBatchNorm (SyncBN) layers in the model will be automatically reverted to BatchNormXd layers if they are used.
02/21 08:06:25 - mmengine - WARNING - The "hook" registry in mmselfsup did not set import location. Fallback to call mmselfsup.utils.register_all_modules
instead.
02/21 08:06:25 - mmengine - WARNING - The "dataset" registry in mmselfsup did not set import location. Fallback to call mmselfsup.utils.register_all_modules
instead.
02/21 08:06:25 - mmengine - WARNING - The "dataset" registry in mmcls did not set import location. Fallback to call mmcls.utils.register_all_modules
instead.
02/21 08:06:26 - mmengine - WARNING - The "data sampler" registry in mmselfsup did not set import location. Fallback to call mmselfsup.utils.register_all_modules
instead.
02/21 08:06:26 - mmengine - INFO - Hooks will be executed in the following order:
before_run:
(VERY_HIGH ) RuntimeInfoHook
(BELOW_NORMAL) LoggerHook
before_train:
(VERY_HIGH ) RuntimeInfoHook
(NORMAL ) IterTimerHook
(NORMAL ) DeepClusterHook
(VERY_LOW ) CheckpointHook
before_train_epoch:
(VERY_HIGH ) RuntimeInfoHook
(NORMAL ) IterTimerHook
(NORMAL ) DistSamplerSeedHook
before_train_iter:
(VERY_HIGH ) RuntimeInfoHook
(NORMAL ) IterTimerHook
after_train_iter:
(VERY_HIGH ) RuntimeInfoHook
(NORMAL ) IterTimerHook
(NORMAL ) ODCHook
(BELOW_NORMAL) LoggerHook
(LOW ) ParamSchedulerHook
(VERY_LOW ) CheckpointHook
after_train_epoch:
(NORMAL ) IterTimerHook
(NORMAL ) DeepClusterHook
(NORMAL ) ODCHook
(LOW ) ParamSchedulerHook
(VERY_LOW ) CheckpointHook
before_val_epoch: (NORMAL ) IterTimerHook
before_val_iter: (NORMAL ) IterTimerHook
after_val_iter:
(NORMAL ) IterTimerHook
(BELOW_NORMAL) LoggerHook
after_val_epoch:
(VERY_HIGH ) RuntimeInfoHook
(NORMAL ) IterTimerHook
(BELOW_NORMAL) LoggerHook
(LOW ) ParamSchedulerHook
(VERY_LOW ) CheckpointHook
before_test_epoch: (NORMAL ) IterTimerHook
before_test_iter: (NORMAL ) IterTimerHook
after_test_iter:
(NORMAL ) IterTimerHook
(BELOW_NORMAL) LoggerHook
after_test_epoch:
(VERY_HIGH ) RuntimeInfoHook
(NORMAL ) IterTimerHook
(BELOW_NORMAL) LoggerHook
after_run: (BELOW_NORMAL) LoggerHook
02/21 08:06:26 - mmengine - WARNING - The "loop" registry in mmselfsup did not set import location. Fallback to call mmselfsup.utils.register_all_modules
instead.
Traceback (most recent call last):
File "/home/wangxin/anaconda3/envs/mmselfsup/lib/python3.8/site-packages/mmengine/registry/build_functions.py", line 121, in build_from_cfg
obj = obj_cls(**args) # type: ignore
File "/home/wangxin/anaconda3/envs/mmselfsup/lib/python3.8/site-packages/mmcls/datasets/cifar.py", line 66, in init
super().init(
TypeError: init() got multiple values for keyword argument 'ann_file'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/wangxin/anaconda3/envs/mmselfsup/lib/python3.8/site-packages/mmengine/registry/build_functions.py", line 121, in build_from_cfg
obj = obj_cls(*args) # type: ignore
File "/home/wangxin/anaconda3/envs/mmselfsup/lib/python3.8/site-packages/mmengine/runner/loops.py", line 43, in init
super().init(runner, dataloader)
File "/home/wangxin/anaconda3/envs/mmselfsup/lib/python3.8/site-packages/mmengine/runner/base_loop.py", line 26, in init
self.dataloader = runner.build_dataloader(
File "/home/wangxin/anaconda3/envs/mmselfsup/lib/python3.8/site-packages/mmengine/runner/runner.py", line 1333, in build_dataloader
dataset = DATASETS.build(dataset_cfg)
File "/home/wangxin/anaconda3/envs/mmselfsup/lib/python3.8/site-packages/mmengine/registry/registry.py", line 521, in build
return self.build_func(cfg, args, **kwargs, registry=self)
File "/home/wangxin/anaconda3/envs/mmselfsup/lib/python3.8/site-packages/mmengine/registry/build_functions.py", line 135, in build_from_cfg
raise type(e)(
TypeError: class CIFAR10
in mmcls/datasets/cifar.py: init() got multiple values for keyword argument 'ann_file'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "tools/train.py", line 99, in EpochBasedTrainLoop
in mmengine/runner/loops.py: class CIFAR10
in mmcls/datasets/cifar.py: init() got multiple values for keyword argument 'ann_file'
I didn't set any about EpochBasedTrainLoop in my config file. It seems to suggest that I repeated the assignment to 'ann_file'? But i just set ann_file='' .when i check 'mmcls/datasets/cifar.py', i found that 'The CIFAR dataset doesn't need specify annotation file'. How could i solve this ? Thanks for help!
Hi, @fangyixiao18 ,i have fixed above question, But there have a new one, as follows:
System environment: sys.platform: linux Python: 3.8.15 (default, Nov 11 2022, 14:08:18) [GCC 11.2.0] CUDA available: False numpy_random_seed: 1612958675 GCC: gcc (Ubuntu 11.3.0-1ubuntu1~22.04) 11.3.0 PyTorch: 1.13.1 PyTorch compiling details: PyTorch built with:
Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CXX_COMPILER=/opt/rh/devtoolset-9/root/usr/bin/c++, CXX_FLAGS= -fabi-version=11 -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOCUPTI -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -DEDGE_PROFILER_USE_KINETO -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wunused-local-typedefs -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=1.13.1, USE_CUDA=0, USE_CUDNN=OFF, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=OFF, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF,
TorchVision: 0.14.1 OpenCV: 4.7.0 MMEngine: 0.5.0
02/21 14:13:43 - mmengine - INFO - Config: model = dict( type='ODC', data_preprocessor=dict( mean=(123.675, 116.28, 103.53), std=(58.395, 57.12, 57.375), bgr_to_rgb=True), backbone=dict( type='ResNet', depth=50, in_channels=3, out_indices=[4], norm_cfg=dict(type='SyncBN')), neck=dict( type='ODCNeck', in_channels=2048, hid_channels=512, out_channels=256, with_avg_pool=True), head=dict( type='ClsHead', loss=dict(type='mmcls.CrossEntropyLoss'), with_avg_pool=False, in_channels=256, num_classes=10000), memory_bank=dict( type='ODCMemory', length=1281167, feat_dim=256, momentum=0.5, num_classes=10000, min_cluster=20, debug=False)) optimizer = dict(type='SGD', lr=0.06, weight_decay=1e-05, momentum=0.9) optim_wrapper = dict( type='OptimWrapper', optimizer=dict(type='SGD', lr=0.06, weight_decay=1e-05, momentum=0.9), paramwise_cfg=dict(custom_keys=dict(head=dict(momentum=0.0)))) param_scheduler = [ dict(type='MultiStepLR', by_epoch=True, milestones=[400], gamma=0.4) ] train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=440) default_scope = 'mmselfsup' default_hooks = dict( runtime_info=dict(type='RuntimeInfoHook'), timer=dict(type='IterTimerHook'), logger=dict(type='LoggerHook', interval=50), param_scheduler=dict(type='ParamSchedulerHook'), checkpoint=dict(type='CheckpointHook', interval=10, max_keep_ckpts=3), sampler_seed=dict(type='DistSamplerSeedHook')) env_cfg = dict( cudnn_benchmark=False, mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), dist_cfg=dict(backend='nccl')) log_processor = dict( window_size=10, custom_cfg=[dict(data_src='', method='mean', window_size='global')]) vis_backends = [dict(type='LocalVisBackend')] visualizer = dict( type='SelfSupVisualizer', vis_backends=[dict(type='LocalVisBackend')], name='visualizer') log_level = 'INFO' load_from = None resume = False dataset_type = 'mmcls.CIFAR10' data_root = '/home/wangxin/cifar' file_client_args = dict(backend='disk') train_pipeline = [ dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), dict(type='RandomResizedCrop', size=224, backend='pillow'), dict(type='RandomFlip', prob=0.5), dict(type='RandomRotation', degrees=2), dict( type='ColorJitter', brightness=0.4, contrast=0.4, saturation=1.0, hue=0.5), dict( type='RandomGrayscale', prob=0.2, keep_channels=True, channel_weights=(0.114, 0.587, 0.2989)), dict( type='PackSelfSupInputs', algorithm_keys=['sample_idx'], meta_keys=['img_path']) ] extract_pipeline = [ dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), dict(type='mmcls.ResizeEdge', scale=256, edge='short', backend='pillow'), dict(type='CenterCrop', crop_size=224), dict(type='PackSelfSupInputs', meta_keys=['img_path']) ] train_dataloader = dict( batch_size=64, num_workers=4, persistent_workers=True, sampler=dict(type='DeepClusterSampler', shuffle=True, replace=True), collate_fn=dict(type='default_collate'), dataset=dict( type='mmcls.CIFAR10', data_root='/home/wangxin/cifar', ann_file='', test_mode=True, data_prefix='/home/wangxin/cifar', pipeline=[ dict( type='LoadImageFromFile', file_client_args=dict(backend='disk')), dict(type='RandomResizedCrop', size=224, backend='pillow'), dict(type='RandomFlip', prob=0.5), dict(type='RandomRotation', degrees=2), dict( type='ColorJitter', brightness=0.4, contrast=0.4, saturation=1.0, hue=0.5), dict( type='RandomGrayscale', prob=0.2, keep_channels=True, channel_weights=(0.114, 0.587, 0.2989)), dict( type='PackSelfSupInputs', algorithm_keys=['sample_idx'], meta_keys=['img_path']) ])) num_classes = 10000 custom_hooks = [ dict( type='DeepClusterHook', extract_dataloader=dict( batch_size=128, num_workers=8, persistent_workers=True, sampler=dict(type='DefaultSampler', shuffle=False, round_up=True), collate_fn=dict(type='default_collate'), dataset=dict( ann_file='', data_root='/home/wangxin/cifar', test_mode=True, data_prefix='/home/wangxin/cifar', type='mmcls.CIFAR10')), clustering=dict(type='Kmeans', k=10000, pca_dim=-1), unif_sampling=False, reweight=True, reweight_pow=0.5, init_memory=True, initial=True, interval=9999999999), dict( type='ODCHook', centroids_update_interval=10, deal_with_small_clusters_interval=1, evaluate_interval=50, reweight=True, reweight_pow=0.5) ] launcher = 'none' work_dir = './work_dirs/home/odc_resnet50_8xb64-steplr-440e_cfiar10'
02/21 14:13:43 - mmengine - WARNING - The "visualizer" registry in mmselfsup did not set import location. Fallback to call mmselfsup.utils.register_all_modules
instead.
02/21 14:13:43 - mmengine - WARNING - The "vis_backend" registry in mmselfsup did not set import location. Fallback to call mmselfsup.utils.register_all_modules
instead.
02/21 14:13:44 - mmengine - WARNING - The "model" registry in mmselfsup did not set import location. Fallback to call mmselfsup.utils.register_all_modules
instead.
02/21 14:14:22 - mmengine - WARNING - The "model" registry in mmcls did not set import location. Fallback to call mmcls.utils.register_all_modules
instead.
02/21 14:14:23 - mmengine - INFO - Distributed training is not used, all SyncBatchNorm (SyncBN) layers in the model will be automatically reverted to BatchNormXd layers if they are used.
02/21 14:14:23 - mmengine - WARNING - The "hook" registry in mmselfsup did not set import location. Fallback to call mmselfsup.utils.register_all_modules
instead.
02/21 14:14:23 - mmengine - WARNING - The "dataset" registry in mmselfsup did not set import location. Fallback to call mmselfsup.utils.register_all_modules
instead.
02/21 14:14:23 - mmengine - WARNING - The "dataset" registry in mmcls did not set import location. Fallback to call mmcls.utils.register_all_modules
instead.
02/21 14:14:24 - mmengine - WARNING - The "data sampler" registry in mmselfsup did not set import location. Fallback to call mmselfsup.utils.register_all_modules
instead.
02/21 14:14:24 - mmengine - INFO - Hooks will be executed in the following order:
before_run:
(VERY_HIGH ) RuntimeInfoHook
(BELOW_NORMAL) LoggerHook
before_train:
(VERY_HIGH ) RuntimeInfoHook
(NORMAL ) IterTimerHook
(NORMAL ) DeepClusterHook
(VERY_LOW ) CheckpointHook
before_train_epoch:
(VERY_HIGH ) RuntimeInfoHook
(NORMAL ) IterTimerHook
(NORMAL ) DistSamplerSeedHook
before_train_iter:
(VERY_HIGH ) RuntimeInfoHook
(NORMAL ) IterTimerHook
after_train_iter:
(VERY_HIGH ) RuntimeInfoHook
(NORMAL ) IterTimerHook
(NORMAL ) ODCHook
(BELOW_NORMAL) LoggerHook
(LOW ) ParamSchedulerHook
(VERY_LOW ) CheckpointHook
after_train_epoch:
(NORMAL ) IterTimerHook
(NORMAL ) DeepClusterHook
(NORMAL ) ODCHook
(LOW ) ParamSchedulerHook
(VERY_LOW ) CheckpointHook
before_val_epoch: (NORMAL ) IterTimerHook
before_val_iter: (NORMAL ) IterTimerHook
after_val_iter:
(NORMAL ) IterTimerHook
(BELOW_NORMAL) LoggerHook
after_val_epoch:
(VERY_HIGH ) RuntimeInfoHook
(NORMAL ) IterTimerHook
(BELOW_NORMAL) LoggerHook
(LOW ) ParamSchedulerHook
(VERY_LOW ) CheckpointHook
before_test_epoch: (NORMAL ) IterTimerHook
before_test_iter: (NORMAL ) IterTimerHook
after_test_iter:
(NORMAL ) IterTimerHook
(BELOW_NORMAL) LoggerHook
after_test_epoch:
(VERY_HIGH ) RuntimeInfoHook
(NORMAL ) IterTimerHook
(BELOW_NORMAL) LoggerHook
after_run: (BELOW_NORMAL) LoggerHook
02/21 14:14:29 - mmengine - WARNING - The "loop" registry in mmselfsup did not set import location. Fallback to call mmselfsup.utils.register_all_modules
instead.
02/21 14:14:29 - mmengine - WARNING - The "transform" registry in mmselfsup did not set import location. Fallback to call mmselfsup.utils.register_all_modules
instead.
02/21 14:14:30 - mmengine - WARNING - The "optimizer wrapper constructor" registry in mmselfsup did not set import location. Fallback to call mmselfsup.utils.register_all_modules
instead.
02/21 14:17:50 - mmengine - INFO - paramwise_options -- head.fc_cls.weight:lr=0.06
02/21 14:17:50 - mmengine - INFO - paramwise_options -- head.fc_cls.weight:weight_decay=1e-05
02/21 14:17:50 - mmengine - INFO - paramwise_options -- head.fc_cls.weight:momentum=0.0
02/21 14:17:50 - mmengine - INFO - paramwise_options -- head.fc_cls.bias:lr=0.06
02/21 14:17:50 - mmengine - INFO - paramwise_options -- head.fc_cls.bias:weight_decay=1e-05
02/21 14:17:50 - mmengine - INFO - paramwise_options -- head.fc_cls.bias:momentum=0.0
02/21 14:17:55 - mmengine - WARNING - The "optimizer" registry in mmselfsup did not set import location. Fallback to call mmselfsup.utils.register_all_modules
instead.
02/21 14:17:55 - mmengine - WARNING - The "optimizer_wrapper" registry in mmselfsup did not set import location. Fallback to call mmselfsup.utils.register_all_modules
instead.
02/21 14:18:09 - mmengine - WARNING - The "parameter scheduler" registry in mmselfsup did not set import location. Fallback to call mmselfsup.utils.register_all_modules
instead.
02/21 14:18:26 - mmengine - WARNING - The "weight initializer" registry in mmselfsup did not set import location. Fallback to call mmselfsup.utils.register_all_modules
instead.
Traceback (most recent call last):
File "/home/wangxin/anaconda3/envs/mmselfsup/lib/python3.8/runpy.py", line 194, in _run_module_as_main
return _run_code(code, main_globals, None,
File "/home/wangxin/anaconda3/envs/mmselfsup/lib/python3.8/runpy.py", line 87, in _run_code
exec(code, run_globals)
File "/home/wangxin/.vscode-server/extensions/ms-python.python-2023.3.10401217/pythonFiles/lib/python/debugpy/adapter/../../debugpy/launcher/../../debugpy/main.py", line 39, in
Do I need to set and ignore this 'module' property? I don't have any ideas. Do you knonw about how to fix it? Thanks for your help!
did you use 1 gpu to train?
Please forgive me for not using cuda,. It works when i use my dataset with ImageNet1k-style.
Please forgive me for not using cuda,. It works when i use my dataset with ImageNet1k-style.
The module
will be built with distributed training, if you run without cuda, you could run another algorithm instead. Currently, the ODC and Deepcluster don't support non-gpu training. Sorry for the inconvenience.
Please forgive me for not using cuda,. It works when i use my dataset with ImageNet1k-style.
The
module
will be built with distributed training, if you run without cuda, you could run another algorithm instead. Currently, the ODC and Deepcluster don't support non-gpu training. Sorry for the inconvenience.
Thanks for your reply! When i run ODC with 1 gpu , there have an error: "AttributeError: 'ODC' object has no attribute 'module'"
And when i run ODC with 3 gpu, there is an Error log as follow:
/home/wangxin/anaconda3/envs/mmselfsup/lib/python3.8/site-packages/torch/distributed/launch.py:178: FutureWarning: The module torch.distributed.launch is deprecated
and will be removed in future. Use torchrun.
Note that --use_env is set by default in torchrun.
If your script expects `--local_rank` argument to be set, please
change it to read from `os.environ['LOCAL_RANK']` instead. See
https://pytorch.org/docs/stable/distributed.html#launch-utility for
further instructions
warnings.warn(
WARNING:torch.distributed.run:
*****************************************
Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed.
*****************************************
/home/wangxin/anaconda3/envs/mmselfsup/lib/python3.8/site-packages/mmengine/utils/dl_utils/setup_env.py:56: UserWarning: Setting MKL_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed.
warnings.warn(
/home/wangxin/anaconda3/envs/mmselfsup/lib/python3.8/site-packages/mmengine/utils/dl_utils/setup_env.py:56: UserWarning: Setting MKL_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed.
warnings.warn(
/home/wangxin/anaconda3/envs/mmselfsup/lib/python3.8/site-packages/mmengine/utils/dl_utils/setup_env.py:56: UserWarning: Setting MKL_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed.
warnings.warn(
02/25 09:43:11 - mmengine - INFO -
------------------------------------------------------------
System environment:
sys.platform: linux
Python: 3.8.15 (default, Nov 11 2022, 14:08:18) [GCC 11.2.0]
CUDA available: True
numpy_random_seed: 361524315
GPU 0,1,2,3: Tesla V100-SXM2-32GB
CUDA_HOME: /usr/local/cuda
NVCC: Cuda compilation tools, release 11.7, V11.7.64
GCC: gcc (Ubuntu 11.3.0-1ubuntu1~22.04) 11.3.0
PyTorch: 1.10.0+cu111
PyTorch compiling details: PyTorch built with:
- GCC 7.3
- C++ Version: 201402
- Intel(R) oneAPI Math Kernel Library Version 2023.0-Product Build 20221128 for Intel(R) 64 architecture applications
- Intel(R) MKL-DNN v2.2.3 (Git Hash 7336ca9f055cf1bfa13efb658fe15dc9b41f0740)
- OpenMP 201511 (a.k.a. OpenMP 4.5)
- LAPACK is enabled (usually provided by MKL)
- NNPACK is enabled
- CPU capability usage: AVX512
- CUDA Runtime 11.1
- NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86
- CuDNN 8.0.5
- Magma 2.5.2
- Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=11.1, CUDNN_VERSION=8.0.5, CXX_COMPILER=/opt/rh/devtoolset-7/root/usr/bin/c++, CXX_FLAGS= -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_KINETO -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -DEDGE_PROFILER_USE_KINETO -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable -Wno-unused-function -Wno-unused-result -Wno-unused-local-typedefs -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=1.10.0, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON,
TorchVision: 0.11.0+cu111
OpenCV: 4.7.0
MMEngine: 0.5.0
Runtime environment:
cudnn_benchmark: False
mp_cfg: {'mp_start_method': 'fork', 'opencv_num_threads': 0}
dist_cfg: {'backend': 'nccl'}
seed: None
Distributed launcher: pytorch
Distributed training: True
GPU number: 3
------------------------------------------------------------
02/25 09:43:11 - mmengine - INFO - Config:
model = dict(
type='ODC',
data_preprocessor=dict(
mean=(123.675, 116.28, 103.53),
std=(58.395, 57.12, 57.375),
bgr_to_rgb=True),
backbone=dict(
type='ResNet',
depth=50,
in_channels=3,
out_indices=[4],
norm_cfg=dict(type='SyncBN')),
neck=dict(
type='ODCNeck',
in_channels=2048,
hid_channels=512,
out_channels=256,
with_avg_pool=True),
head=dict(
type='ClsHead',
loss=dict(type='mmcls.CrossEntropyLoss'),
with_avg_pool=False,
in_channels=256,
num_classes=10000),
memory_bank=dict(
type='ODCMemory',
length=1281167,
feat_dim=256,
momentum=0.5,
num_classes=10000,
min_cluster=20,
debug=False))
dataset_type = 'DeepClusterImageNet'
data_root = '/home/wangxin/mmselfsup_1.x/data/imagenet'
file_client_args = dict(backend='disk')
train_pipeline = [
dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')),
dict(type='RandomResizedCrop', size=224, backend='pillow'),
dict(type='RandomFlip', prob=0.5),
dict(type='RandomRotation', degrees=2),
dict(
type='ColorJitter',
brightness=0.4,
contrast=0.4,
saturation=1.0,
hue=0.5),
dict(
type='RandomGrayscale',
prob=0.2,
keep_channels=True,
channel_weights=(0.114, 0.587, 0.2989)),
dict(
type='PackSelfSupInputs',
algorithm_keys=['sample_idx'],
meta_keys=['img_path'])
]
extract_pipeline = [
dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')),
dict(type='mmcls.ResizeEdge', scale=256, edge='short', backend='pillow'),
dict(type='CenterCrop', crop_size=224),
dict(type='PackSelfSupInputs', meta_keys=['img_path'])
]
train_dataloader = dict(
batch_size=64,
num_workers=4,
persistent_workers=True,
sampler=dict(type='DeepClusterSampler', shuffle=True, replace=True),
collate_fn=dict(type='default_collate'),
dataset=dict(
type='DeepClusterImageNet',
data_root='/home/wangxin/mmselfsup_1.x/data/imagenet',
ann_file='meta/train.txt',
data_prefix=dict(img_path='train/'),
pipeline=[
dict(
type='LoadImageFromFile',
file_client_args=dict(backend='disk')),
dict(type='RandomResizedCrop', size=224, backend='pillow'),
dict(type='RandomFlip', prob=0.5),
dict(type='RandomRotation', degrees=2),
dict(
type='ColorJitter',
brightness=0.4,
contrast=0.4,
saturation=1.0,
hue=0.5),
dict(
type='RandomGrayscale',
prob=0.2,
keep_channels=True,
channel_weights=(0.114, 0.587, 0.2989)),
dict(
type='PackSelfSupInputs',
algorithm_keys=['sample_idx'],
meta_keys=['img_path'])
]))
num_classes = 10000
custom_hooks = [
dict(
type='DeepClusterHook',
extract_dataloader=dict(
batch_size=128,
num_workers=8,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=False, round_up=True),
collate_fn=dict(type='default_collate'),
dataset=dict(
type='DeepClusterImageNet',
data_root='/home/wangxin/mmselfsup_1.x/data/imagenet',
ann_file='meta/train.txt',
data_prefix=dict(img_path='train/'),
pipeline=[
dict(
type='LoadImageFromFile',
file_client_args=dict(backend='disk')),
dict(
type='mmcls.ResizeEdge',
scale=256,
edge='short',
backend='pillow'),
dict(type='CenterCrop', crop_size=224),
dict(type='PackSelfSupInputs', meta_keys=['img_path'])
])),
clustering=dict(type='Kmeans', k=10000, pca_dim=-1),
unif_sampling=False,
reweight=True,
reweight_pow=0.5,
init_memory=True,
initial=True,
interval=9999999999),
dict(
type='ODCHook',
centroids_update_interval=10,
deal_with_small_clusters_interval=1,
evaluate_interval=50,
reweight=True,
reweight_pow=0.5)
]
optimizer = dict(type='SGD', lr=0.06, weight_decay=1e-05, momentum=0.9)
optim_wrapper = dict(
type='OptimWrapper',
optimizer=dict(type='SGD', lr=0.06, weight_decay=1e-05, momentum=0.9),
paramwise_cfg=dict(custom_keys=dict(head=dict(momentum=0.0))))
param_scheduler = [
dict(type='MultiStepLR', by_epoch=True, milestones=[400], gamma=0.4)
]
train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=440)
default_scope = 'mmselfsup'
default_hooks = dict(
runtime_info=dict(type='RuntimeInfoHook'),
timer=dict(type='IterTimerHook'),
logger=dict(type='LoggerHook', interval=50),
param_scheduler=dict(type='ParamSchedulerHook'),
checkpoint=dict(type='CheckpointHook', interval=10, max_keep_ckpts=3),
sampler_seed=dict(type='DistSamplerSeedHook'))
env_cfg = dict(
cudnn_benchmark=False,
mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
dist_cfg=dict(backend='nccl'))
log_processor = dict(
window_size=10,
custom_cfg=[dict(data_src='', method='mean', window_size='global')])
vis_backends = [dict(type='LocalVisBackend')]
visualizer = dict(
type='SelfSupVisualizer',
vis_backends=[dict(type='LocalVisBackend')],
name='visualizer')
log_level = 'INFO'
load_from = None
resume = False
launcher = 'pytorch'
work_dir = './work_dirs/selfsup/odc_resnet50_8xb64-steplr-440e_in1k'
02/25 09:43:11 - mmengine - WARNING - The "visualizer" registry in mmselfsup did not set import location. Fallback to call `mmselfsup.utils.register_all_modules` instead.
02/25 09:43:11 - mmengine - WARNING - The "vis_backend" registry in mmselfsup did not set import location. Fallback to call `mmselfsup.utils.register_all_modules` instead.
02/25 09:43:11 - mmengine - WARNING - The "model" registry in mmselfsup did not set import location. Fallback to call `mmselfsup.utils.register_all_modules` instead.
02/25 09:43:11 - mmengine - WARNING - The "model" registry in mmcls did not set import location. Fallback to call `mmcls.utils.register_all_modules` instead.
02/25 09:43:12 - mmengine - WARNING - The "hook" registry in mmselfsup did not set import location. Fallback to call `mmselfsup.utils.register_all_modules` instead.
02/25 09:43:12 - mmengine - WARNING - The "dataset" registry in mmselfsup did not set import location. Fallback to call `mmselfsup.utils.register_all_modules` instead.
02/25 09:43:12 - mmengine - WARNING - The "transform" registry in mmselfsup did not set import location. Fallback to call `mmselfsup.utils.register_all_modules` instead.
02/25 09:43:12 - mmengine - WARNING - The "transform" registry in mmcls did not set import location. Fallback to call `mmcls.utils.register_all_modules` instead.
02/25 09:43:12 - mmengine - WARNING - The "data sampler" registry in mmselfsup did not set import location. Fallback to call `mmselfsup.utils.register_all_modules` instead.
02/25 09:43:12 - mmengine - INFO - Hooks will be executed in the following order:
before_run:
(VERY_HIGH ) RuntimeInfoHook
(BELOW_NORMAL) LoggerHook
--------------------
before_train:
(VERY_HIGH ) RuntimeInfoHook
(NORMAL ) IterTimerHook
(NORMAL ) DeepClusterHook
(VERY_LOW ) CheckpointHook
--------------------
before_train_epoch:
(VERY_HIGH ) RuntimeInfoHook
(NORMAL ) IterTimerHook
(NORMAL ) DistSamplerSeedHook
--------------------
before_train_iter:
(VERY_HIGH ) RuntimeInfoHook
(NORMAL ) IterTimerHook
--------------------
after_train_iter:
(VERY_HIGH ) RuntimeInfoHook
(NORMAL ) IterTimerHook
(NORMAL ) ODCHook
(BELOW_NORMAL) LoggerHook
(LOW ) ParamSchedulerHook
(VERY_LOW ) CheckpointHook
--------------------
after_train_epoch:
(NORMAL ) IterTimerHook
(NORMAL ) DeepClusterHook
(NORMAL ) ODCHook
(LOW ) ParamSchedulerHook
(VERY_LOW ) CheckpointHook
--------------------
before_val_epoch:
(NORMAL ) IterTimerHook
--------------------
before_val_iter:
(NORMAL ) IterTimerHook
--------------------
after_val_iter:
(NORMAL ) IterTimerHook
(BELOW_NORMAL) LoggerHook
--------------------
after_val_epoch:
(VERY_HIGH ) RuntimeInfoHook
(NORMAL ) IterTimerHook
(BELOW_NORMAL) LoggerHook
(LOW ) ParamSchedulerHook
(VERY_LOW ) CheckpointHook
--------------------
before_test_epoch:
(NORMAL ) IterTimerHook
--------------------
before_test_iter:
(NORMAL ) IterTimerHook
--------------------
after_test_iter:
(NORMAL ) IterTimerHook
(BELOW_NORMAL) LoggerHook
--------------------
after_test_epoch:
(VERY_HIGH ) RuntimeInfoHook
(NORMAL ) IterTimerHook
(BELOW_NORMAL) LoggerHook
--------------------
after_run:
(BELOW_NORMAL) LoggerHook
--------------------
02/25 09:43:12 - mmengine - WARNING - The "loop" registry in mmselfsup did not set import location. Fallback to call `mmselfsup.utils.register_all_modules` instead.
02/25 09:43:12 - mmengine - WARNING - The "optimizer wrapper constructor" registry in mmselfsup did not set import location. Fallback to call `mmselfsup.utils.register_all_modules` instead.
02/25 09:43:12 - mmengine - INFO - paramwise_options -- head.fc_cls.weight:lr=0.06
02/25 09:43:12 - mmengine - INFO - paramwise_options -- head.fc_cls.weight:weight_decay=1e-05
02/25 09:43:12 - mmengine - INFO - paramwise_options -- head.fc_cls.weight:momentum=0.0
02/25 09:43:12 - mmengine - INFO - paramwise_options -- head.fc_cls.bias:lr=0.06
02/25 09:43:12 - mmengine - INFO - paramwise_options -- head.fc_cls.bias:weight_decay=1e-05
02/25 09:43:12 - mmengine - INFO - paramwise_options -- head.fc_cls.bias:momentum=0.0
02/25 09:43:12 - mmengine - WARNING - The "optimizer" registry in mmselfsup did not set import location. Fallback to call `mmselfsup.utils.register_all_modules` instead.
02/25 09:43:12 - mmengine - WARNING - The "optimizer_wrapper" registry in mmselfsup did not set import location. Fallback to call `mmselfsup.utils.register_all_modules` instead.
02/25 09:43:12 - mmengine - WARNING - The "parameter scheduler" registry in mmselfsup did not set import location. Fallback to call `mmselfsup.utils.register_all_modules` instead.
02/25 09:43:12 - mmengine - WARNING - The "weight initializer" registry in mmselfsup did not set import location. Fallback to call `mmselfsup.utils.register_all_modules` instead.
[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 4/4, 0.2 task/s, elapsed: 18s, ETA: 0sTraceback (most recent call last):
File "tools/train.py", line 99, in <module>
main()
File "tools/train.py", line 95, in main
runner.train()
File "/home/wangxin/anaconda3/envs/mmselfsup/lib/python3.8/site-packages/mmengine/runner/runner.py", line 1686, in train
model = self.train_loop.run() # type: ignore
File "/home/wangxin/anaconda3/envs/mmselfsup/lib/python3.8/site-packages/mmengine/runner/loops.py", line 87, in run
self.runner.call_hook('before_train')
File "/home/wangxin/anaconda3/envs/mmselfsup/lib/python3.8/site-packages/mmengine/runner/runner.py", line 1748, in call_hook
getattr(hook, fn_name)(self, **kwargs)
File "/home/wangxin/mmselfsup_test/mmselfsup/mmselfsup/engine/hooks/deepcluster_hook.py", line 65, in before_train
self.deepcluster(runner)
File "/home/wangxin/mmselfsup_test/mmselfsup/mmselfsup/engine/hooks/deepcluster_hook.py", line 86, in deepcluster
clustering_algo.cluster(features, verbose=True)
File "/home/wangxin/mmselfsup_test/mmselfsup/mmselfsup/utils/clustering.py", line 156, in cluster
I, loss = run_kmeans(xb, self.k, verbose)
File "/home/wangxin/mmselfsup_test/mmselfsup/mmselfsup/utils/clustering.py", line 101, in run_kmeans
clus = faiss.Clustering(d, nmb_clusters)
AttributeError: 'NoneType' object has no attribute 'Clustering'
WARNING:torch.distributed.elastic.multiprocessing.api:Sending process 478798 closing signal SIGTERM
WARNING:torch.distributed.elastic.multiprocessing.api:Sending process 478799 closing signal SIGTERM
ERROR:torch.distributed.elastic.multiprocessing.api:failed (exitcode: 1) local_rank: 0 (pid: 478797) of binary: /home/wangxin/anaconda3/envs/mmselfsup/bin/python
Traceback (most recent call last):
File "/home/wangxin/anaconda3/envs/mmselfsup/lib/python3.8/runpy.py", line 194, in _run_module_as_main
return _run_code(code, main_globals, None,
File "/home/wangxin/anaconda3/envs/mmselfsup/lib/python3.8/runpy.py", line 87, in _run_code
exec(code, run_globals)
File "/home/wangxin/anaconda3/envs/mmselfsup/lib/python3.8/site-packages/torch/distributed/launch.py", line 193, in <module>
main()
File "/home/wangxin/anaconda3/envs/mmselfsup/lib/python3.8/site-packages/torch/distributed/launch.py", line 189, in main
launch(args)
File "/home/wangxin/anaconda3/envs/mmselfsup/lib/python3.8/site-packages/torch/distributed/launch.py", line 174, in launch
run(args)
File "/home/wangxin/anaconda3/envs/mmselfsup/lib/python3.8/site-packages/torch/distributed/run.py", line 710, in run
elastic_launch(
File "/home/wangxin/anaconda3/envs/mmselfsup/lib/python3.8/site-packages/torch/distributed/launcher/api.py", line 131, in __call__
return launch_agent(self._config, self._entrypoint, list(args))
File "/home/wangxin/anaconda3/envs/mmselfsup/lib/python3.8/site-packages/torch/distributed/launcher/api.py", line 259, in launch_agent
raise ChildFailedError(
torch.distributed.elastic.multiprocessing.errors.ChildFailedError:
============================================================
tools/train.py FAILED
------------------------------------------------------------
Failures:
<NO_OTHER_FAILURES>
------------------------------------------------------------
Root Cause (first observed failure):
[0]:
time : 2023-02-25_09:43:34
host : iar-server-7
rank : 0 (local_rank: 0)
exitcode : 1 (pid: 478797)
error_file: <N/A>
traceback : To enable traceback see: https://pytorch.org/docs/stable/elastic/errors.html
Is there something wrong with me?Your reply wiil very helpful to me!
You need to install faiss
package, https://github.com/facebookresearch/faiss
I want to use CIFAR10 in ODC, and my config file as follow:
base = [ '../base/models/odc.py',
'../base/datasets/imagenet_odc.py',
]
Changed Start
dataset settings
dataset_type = 'CIFAR10' data_root = '/home/wangxin/cifar/' file_client_args = dict(backend='disk')
train_pipeline = [ dict(type='LoadImageFromFile', file_client_args=file_client_args), dict(type='RandomResizedCrop', size=224, backend='pillow'), dict(type='RandomFlip', prob=0.5), dict(type='RandomRotation', degrees=2), dict( type='ColorJitter', brightness=0.4, contrast=0.4, saturation=1.0, hue=0.5), dict( type='RandomGrayscale', prob=0.2, keep_channels=True, channel_weights=(0.114, 0.587, 0.2989)), dict( type='PackSelfSupInputs', algorithm_keys=['sample_idx'], meta_keys=['img_path']) ]
extract_pipeline = [ dict(type='LoadImageFromFile', file_client_args=file_client_args), dict(type='mmcls.ResizeEdge', scale=256, edge='short', backend='pillow'), dict(type='CenterCrop', crop_size=224), dict(type='PackSelfSupInputs', meta_keys=['img_path']) ]
train_dataloader = dict( batch_size=64, num_workers=4, persistent_workers=True, sampler=dict(type='DeepClusterSampler', shuffle=True, replace=True), collate_fn=dict(type='default_collate'), dataset=dict( type=dataset_type, data_root=data_root, ann_file='meta/train.txt', data_prefix=dict(img_path='train/'), pipeline=train_pipeline))
num_classes = 10000 custom_hooks = [ dict( type='DeepClusterHook', extract_dataloader=dict( batch_size=128, num_workers=8, persistent_workers=True, sampler=dict(type='DefaultSampler', shuffle=False, round_up=True), collate_fn=dict(type='default_collate'), dataset=dict( type=dataset_type, data_root=data_root, ann_file='meta/train.txt', data_prefix=dict(img_path='train/'), pipeline=extract_pipeline)), clustering=dict(type='Kmeans', k=num_classes, pca_dim=-1), # no pca unif_sampling=False, reweight=True, reweight_pow=0.5, init_memory=True, initial=True, # call initially interval=9999999999), # initial only dict( type='ODCHook', centroids_update_interval=10, # iter deal_with_small_clusters_interval=1, evaluate_interval=50, reweight=True, reweight_pow=0.5) ]
Changed End
model settings
model = dict(
head=dict(num_classes=num_classes), memory_bank=dict(num_classes=num_classes), )
optimizer
optimizer = dict(type='SGD', lr=0.06, weight_decay=1e-5, momentum=0.9) optim_wrapper = dict( type='OptimWrapper', optimizer=optimizer, paramwise_cfg=dict(custom_keys={'head': dict(momentum=0.)}))
learning rate scheduler
param_scheduler = [ dict(type='MultiStepLR', by_epoch=True, milestones=[400], gamma=0.4) ]
runtime settings
train_cfg = dict(max_epochs=440) default_hooks = dict( checkpoint=dict(type='CheckpointHook', interval=10, max_keep_ckpts=3))
Then there has Error as follow :
System environment: sys.platform: linux Python: 3.8.15 (default, Nov 11 2022, 14:08:18) [GCC 11.2.0] CUDA available: False numpy_random_seed: 889434048 GCC: gcc (Ubuntu 11.3.0-1ubuntu1~22.04) 11.3.0 PyTorch: 1.13.1 PyTorch compiling details: PyTorch built with:
GCC 9.3
C++ Version: 201402
Intel(R) oneAPI Math Kernel Library Version 2023.0-Product Build 20221128 for Intel(R) 64 architecture applications
Intel(R) MKL-DNN v2.6.0 (Git Hash 52b5f107dd9cf10910aaa19cb47f3abf9b349815)
OpenMP 201511 (a.k.a. OpenMP 4.5)
LAPACK is enabled (usually provided by MKL)
NNPACK is enabled
CPU capability usage: AVX2
Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CXX_COMPILER=/opt/rh/devtoolset-9/root/usr/bin/c++, CXX_FLAGS= -fabi-version=11 -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOCUPTI -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -DEDGE_PROFILER_USE_KINETO -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wunused-local-typedefs -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=1.13.1, USE_CUDA=0, USE_CUDNN=OFF, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=OFF, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF,
TorchVision: 0.14.1 OpenCV: 4.7.0 MMEngine: 0.5.0
Runtime environment: cudnn_benchmark: False mp_cfg: {'mp_start_method': 'fork', 'opencv_num_threads': 0} dist_cfg: {'backend': 'nccl'} seed: None Distributed launcher: none Distributed training: False GPU number: 1 02/21 02:17:01 - mmengine - INFO - Config: model = dict( type='ODC', data_preprocessor=dict( mean=(123.675, 116.28, 103.53), std=(58.395, 57.12, 57.375), bgr_to_rgb=True), backbone=dict( type='ResNet', depth=50, in_channels=3, out_indices=[4], norm_cfg=dict(type='SyncBN')), neck=dict( type='ODCNeck', in_channels=2048, hid_channels=512, out_channels=256, with_avg_pool=True), head=dict( type='ClsHead', loss=dict(type='mmcls.CrossEntropyLoss'), with_avg_pool=False, in_channels=256, num_classes=10000), memory_bank=dict( type='ODCMemory', length=1281167, feat_dim=256, momentum=0.5, num_classes=10000, min_cluster=20, debug=False)) optimizer = dict(type='SGD', lr=0.06, weight_decay=1e-05, momentum=0.9) optim_wrapper = dict( type='OptimWrapper', optimizer=dict(type='SGD', lr=0.06, weight_decay=1e-05, momentum=0.9), paramwise_cfg=dict(custom_keys=dict(head=dict(momentum=0.0)))) param_scheduler = [ dict(type='MultiStepLR', by_epoch=True, milestones=[400], gamma=0.4) ] train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=440) default_scope = 'mmselfsup' default_hooks = dict( runtime_info=dict(type='RuntimeInfoHook'), timer=dict(type='IterTimerHook'), logger=dict(type='LoggerHook', interval=50), param_scheduler=dict(type='ParamSchedulerHook'), checkpoint=dict(type='CheckpointHook', interval=10, max_keep_ckpts=3), sampler_seed=dict(type='DistSamplerSeedHook')) env_cfg = dict( cudnn_benchmark=False, mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), dist_cfg=dict(backend='nccl')) log_processor = dict( window_size=10, custom_cfg=[dict(data_src='', method='mean', window_size='global')]) vis_backends = [dict(type='LocalVisBackend')] visualizer = dict( type='SelfSupVisualizer', vis_backends=[dict(type='LocalVisBackend')], name='visualizer') log_level = 'INFO' load_from = None resume = False dataset_type = 'CIFAR10' data_root = '/home/wangxin/cifar/' file_client_args = dict(backend='disk') train_pipeline = [ dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), dict(type='RandomResizedCrop', size=224, backend='pillow'), dict(type='RandomFlip', prob=0.5), dict(type='RandomRotation', degrees=2), dict( type='ColorJitter', brightness=0.4, contrast=0.4, saturation=1.0, hue=0.5), dict( type='RandomGrayscale', prob=0.2, keep_channels=True, channel_weights=(0.114, 0.587, 0.2989)), dict( type='PackSelfSupInputs', algorithm_keys=['sample_idx'], meta_keys=['img_path']) ] extract_pipeline = [ dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')), dict(type='mmcls.ResizeEdge', scale=256, edge='short', backend='pillow'), dict(type='CenterCrop', crop_size=224), dict(type='PackSelfSupInputs', meta_keys=['img_path']) ] train_dataloader = dict( batch_size=64, num_workers=4, persistent_workers=True, sampler=dict(type='DeepClusterSampler', shuffle=True, replace=True), collate_fn=dict(type='default_collate'), dataset=dict( type='CIFAR10', data_root='/home/wangxin/cifar/', ann_file='meta/train.txt', data_prefix=dict(img_path='train/'), pipeline=[ dict( type='LoadImageFromFile', file_client_args=dict(backend='disk')), dict(type='RandomResizedCrop', size=224, backend='pillow'), dict(type='RandomFlip', prob=0.5), dict(type='RandomRotation', degrees=2), dict( type='ColorJitter', brightness=0.4, contrast=0.4, saturation=1.0, hue=0.5), dict( type='RandomGrayscale', prob=0.2, keep_channels=True, channel_weights=(0.114, 0.587, 0.2989)), dict( type='PackSelfSupInputs', algorithm_keys=['sample_idx'], meta_keys=['img_path']) ])) num_classes = 10000 custom_hooks = [ dict( type='DeepClusterHook', extract_dataloader=dict( batch_size=128, num_workers=8, persistent_workers=True, sampler=dict(type='DefaultSampler', shuffle=False, round_up=True), collate_fn=dict(type='default_collate'), dataset=dict( type='CIFAR10', data_root='/home/wangxin/cifar/', ann_file='meta/train.txt', data_prefix=dict(img_path='train/'), pipeline=[ dict( type='LoadImageFromFile', file_client_args=dict(backend='disk')), dict( type='mmcls.ResizeEdge', scale=256, edge='short', backend='pillow'), dict(type='CenterCrop', crop_size=224), dict(type='PackSelfSupInputs', meta_keys=['img_path']) ])), clustering=dict(type='Kmeans', k=10000, pca_dim=-1), unif_sampling=False, reweight=True, reweight_pow=0.5, init_memory=True, initial=True, interval=9999999999), dict( type='ODCHook', centroids_update_interval=10, deal_with_small_clusters_interval=1, evaluate_interval=50, reweight=True, reweight_pow=0.5) ] launcher = 'none' work_dir = './work_dirs/selfsup/odc_resnet50_8xb64-steplr-440e_cifar'
02/21 02:17:01 - mmengine - WARNING - The "visualizer" registry in mmselfsup did not set import location. Fallback to call mmselfsup.utils.register_all_modules instead. 02/21 02:17:01 - mmengine - WARNING - The "vis_backend" registry in mmselfsup did not set import location. Fallback to call mmselfsup.utils.register_all_modules instead. 02/21 02:17:02 - mmengine - WARNING - The "model" registry in mmselfsup did not set import location. Fallback to call mmselfsup.utils.register_all_modules instead. 02/21 02:17:02 - mmengine - WARNING - The "model" registry in mmcls did not set import location. Fallback to call mmcls.utils.register_all_modules instead. 02/21 02:17:02 - mmengine - INFO - Distributed training is not used, all SyncBatchNorm (SyncBN) layers in the model will be automatically reverted to BatchNormXd layers if they are used. 02/21 02:17:02 - mmengine - WARNING - The "hook" registry in mmselfsup did not set import location. Fallback to call mmselfsup.utils.register_all_modules instead. 02/21 02:17:02 - mmengine - WARNING - The "dataset" registry in mmselfsup did not set import location. Fallback to call mmselfsup.utils.register_all_modules instead. Traceback (most recent call last): File "/home/wangxin/anaconda3/envs/openmmlab/lib/python3.8/site-packages/mmengine/registry/build_functions.py", line 121, in build_from_cfg obj = obj_cls(*args) # type: ignore File "/home/wangxin/mmselfsup/mmselfsup/engine/hooks/deepcluster_hook.py", line 47, in init self.extractor = Extractor( File "/home/wangxin/mmselfsup/mmselfsup/models/utils/extractor.py", line 52, in init self.data_loader = Runner.build_dataloader( File "/home/wangxin/anaconda3/envs/openmmlab/lib/python3.8/site-packages/mmengine/runner/runner.py", line 1333, in build_dataloader dataset = DATASETS.build(dataset_cfg) File "/home/wangxin/anaconda3/envs/openmmlab/lib/python3.8/site-packages/mmengine/registry/registry.py", line 521, in build return self.build_func(cfg, args, **kwargs, registry=self) File "/home/wangxin/anaconda3/envs/openmmlab/lib/python3.8/site-packages/mmengine/registry/build_functions.py", line 100, in build_from_cfg raise KeyError( KeyError: 'CIFAR10 is not in the dataset registry. Please check whether the value of CIFAR10 is correct or it was registered as expected. More details can be found at https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#import-the-custom-module'
During handling of the above exception, another exception occurred:
Traceback (most recent call last): File "tools/train.py", line 99, in main() File "tools/train.py", line 92, in main runner = Runner.from_cfg(cfg) File "/home/wangxin/anaconda3/envs/openmmlab/lib/python3.8/site-packages/mmengine/runner/runner.py", line 431, in from_cfg runner = cls( File "/home/wangxin/anaconda3/envs/openmmlab/lib/python3.8/site-packages/mmengine/runner/runner.py", line 411, in init self.register_hooks(default_hooks, custom_hooks) File "/home/wangxin/anaconda3/envs/openmmlab/lib/python3.8/site-packages/mmengine/runner/runner.py", line 1904, in register_hooks self.register_custom_hooks(custom_hooks) File "/home/wangxin/anaconda3/envs/openmmlab/lib/python3.8/site-packages/mmengine/runner/runner.py", line 1885, in register_custom_hooks self.register_hook(hook) File "/home/wangxin/anaconda3/envs/openmmlab/lib/python3.8/site-packages/mmengine/runner/runner.py", line 1786, in register_hook hook_obj = HOOKS.build(hook) File "/home/wangxin/anaconda3/envs/openmmlab/lib/python3.8/site-packages/mmengine/registry/registry.py", line 521, in build return self.build_func(cfg, *args, **kwargs, registry=self) File "/home/wangxin/anaconda3/envs/openmmlab/lib/python3.8/site-packages/mmengine/registry/build_functions.py", line 135, in build_from_cfg raise type(e)( KeyError: "class DeepClusterHook in mmselfsup/engine/hooks/deepcluster_hook.py: 'CIFAR10 is not in the dataset registry. Please check whether the value of CIFAR10 is correct or it was registered as expected. More details can be found at https://mmengine.readthedocs.io/en/latest/advanced_tutorials/config.html#import-the-custom-module'"
It's seems need to registry 'CIFAR10'? Does mmselfsup have its own CIFAR10? How to use CIFAR10 in ODC? Thanks for help!