Closed NickYangMin closed 2 years ago
Sorry for taking so long to get back to you. We will solve the related issues you raised as soon as possible.
Any update on this issue? I also met the same issue.
Hi! Is there an error when running the following code?
from mmdet.datasets import build_dataset
from mmcv import Config, DictAction, ConfigDict
img_scale = (640, 640)
data_root = 'data/coco/'
dataset_type = 'CocoDataset'
train_pipeline = [
dict(type='Mosaic', img_scale=img_scale, pad_val=114.0),
dict(
type='RandomAffine',
scaling_ratio_range=(0.1, 2),
border=(-img_scale[0] // 2, -img_scale[1] // 2)),
dict(
type='MixUp',
img_scale=img_scale,
ratio_range=(0.8, 1.6),
pad_val=114.0),
dict(type='YOLOXHSVRandomAug'),
dict(type='RandomFlip', flip_ratio=0.5),
# According to the official implementation, multi-scale
# training is not considered here but in the
# 'mmdet/models/detectors/yolox.py'.
dict(type='Resize', img_scale=img_scale, keep_ratio=True),
dict(
type='Pad',
pad_to_square=True,
# If the image is three-channel, the pad value needs
# to be set separately for each channel.
pad_val=dict(img=(114.0, 114.0, 114.0))),
dict(type='FilterAnnotations', min_gt_bbox_wh=(1, 1), keep_empty=False),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
]
train_dataset = dict(
type='MultiImageMixDataset',
dataset=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_train2017.json',
img_prefix=data_root + 'train2017/',
pipeline=[
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True)
],
filter_empty_gt=False),
pipeline=train_pipeline)
data = ConfigDict(
samples_per_gpu=8,
workers_per_gpu=4,
persistent_workers=True,
train=train_dataset)
datasets = [build_dataset(data.train)]
After I switched the code to 0.3.0,the error dispeared, but got another error : AutoSlim: Our current StructurePruner does not support pruning this architecture. StructurePruner is not perfect enough to handle all the corner cases. We will appreciate it if you create a issue. File "xxx/mmrazor_superacme_0.3.0/mmrazor/models/algorithms/autoslim.py", line 69, in _init_pruner pseudo_pruner.prepare_from_supernet(pseudo_architecture) File "xxx/mmrazor/models/pruners/structure_pruning.py", line 152, in prepare_from_supernet pseudo_img = supernet.forward_dummy(pseudo_img) File "xxx/mmrazor_superacme_0.3.0/mmrazor/models/architectures/base.py", line 21, in forward_dummy return self.model.forward_dummy(img) File "xxx/anaconda3/envs/openmmlab/lib/python3.7/site-packages/mmdet/models/detectors/single_stage.py", line 53, in forward_dummy x = self.extract_feat(img) File "xxx/anaconda3/envs/openmmlab/lib/python3.7/site-packages/mmdet/models/detectors/single_stage.py", line 45, in extract_feat x = self.neck(x) File "xxx/anaconda3/envs/openmmlab/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl return forward_call(*input, *kwargs) File "xxx/anaconda3/envs/openmmlab/lib/python3.7/site-packages/mmdet/models/necks/yolox_pafpn.py", line 133, in forward feat_heigh) File "xxx/anaconda3/envs/openmmlab/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl return forward_call(input, **kwargs)
Is there an error when running the following code?
from mmcv import ConfigDict
img_scale = (640, 640)
model = dict(
type='mmdet.YOLOX',
input_size=img_scale,
random_size_range=(15, 25),
random_size_interval=10,
backbone=dict(type='CSPDarknet', deepen_factor=0.33, widen_factor=0.5),
neck=dict(
type='YOLOXPAFPN',
in_channels=[128, 256, 512],
out_channels=128,
num_csp_blocks=1),
bbox_head=dict(
type='YOLOXHead', num_classes=80, in_channels=128, feat_channels=128),
train_cfg=dict(assigner=dict(type='SimOTAAssigner', center_radius=2.5)),
# In order to align the source code, the threshold of the val phase is
# 0.01, and the threshold of the test phase is 0.001.
test_cfg=dict(score_thr=0.01, nms=dict(type='nms', iou_threshold=0.65)))
algorithm_cfg = ConfigDict(
type='AutoSlim',
architecture=dict(type='MMDetArchitecture', model=model),
pruner=dict(
type='RatioPruner',
ratios=(2 / 12, 3 / 12, 4 / 12, 5 / 12, 6 / 12, 7 / 12, 8 / 12, 9 / 12,
10 / 12, 11 / 12, 1.0)),
retraining=False,
bn_training_mode=True,
input_shape=None)
algorithm = build_algorithm(algorithm_cfg)
yes,the error still exist
发生异常: NotImplementedError (note: full exception trace is shown but execution is paused at: _run_module_as_main) AutoSlim: Our current StructurePruner does not support pruning this architecture. StructurePruner is not perfect enough to handle all the corner cases. We will appreciate it if you create a issue. File "xxx/share/openmmlab/mmrazor_superacme_0.3.0/mmrazor/models/algorithms/autoslim.py", line 76, in _init_pruner pseudo_architecture.forward_dummy(pseudo_img) File "xxx/share/openmmlab/mmrazor_superacme_0.3.0/mmrazor/models/architectures/base.py", line 21, in forward_dummy return self.model.forward_dummy(img) File "xxx/anaconda3/envs/openmmlab/lib/python3.7/site-packages/mmdet/models/detectors/single_stage.py", line 53, in forward_dummy x = self.extract_feat(img) File "xxx/anaconda3/envs/openmmlab/lib/python3.7/site-packages/mmdet/models/detectors/single_stage.py", line 43, in extract_feat x = self.backbone(img) File "xxx/anaconda3/envs/openmmlab/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl return forward_call(*input, kwargs) File "xxx/anaconda3/envs/openmmlab/lib/python3.7/site-packages/mmdet/models/backbones/csp_darknet.py", line 281, in forward x = layer(x) File "xxx/anaconda3/envs/openmmlab/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl return forward_call(*input, *kwargs) File "xxx/anaconda3/envs/openmmlab/lib/python3.7/site-packages/torch/nn/modules/container.py", line 139, in forward input = module(input) File "xxx/anaconda3/envs/openmmlab/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl return forward_call(input, kwargs) File "xxx/anaconda3/envs/openmmlab/lib/python3.7/site-packages/mmdet/models/utils/csp_layer.py", line 150, in forward return self.final_conv(x_final) File "xxx/anaconda3/envs/openmmlab/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl return forward_call(*input, *kwargs) File "/home/yangmin/anaconda3/envs/openmmlab/lib/python3.7/site-packages/mmcv/cnn/bricks/conv_module.py", line 201, in forward x = self.conv(x) File "xxx/anaconda3/envs/openmmlab/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl return forward_call(input, *kwargs) File "xxx/share/openmmlab/mmrazor_superacme_0.3.0/mmrazor/models/pruners/structure_pruning.py", line 433, in modified_forward feature = feature self.in_mask
发生异常: NotImplementedError (note: full exception trace is shown but execution is paused at: _run_module_as_main) AutoSlim: Our current StructurePruner does not support pruning this architecture. StructurePruner is not perfect enough to handle all the corner cases. We will appreciate it if you create a issue. File "xxx/share/openmmlab/mmrazor_superacme_0.3.0/mmrazor/models/algorithms/autoslim.py", line 76, in _init_pruner pseudo_architecture.forward_dummy(pseudo_img) File "xxx/share/openmmlab/mmrazor_superacme_0.3.0/mmrazor/models/architectures/base.py", line 21, in forward_dummy return self.model.forward_dummy(img) File "xxx/anaconda3/envs/openmmlab/lib/python3.7/site-packages/mmdet/models/detectors/single_stage.py", line 53, in forward_dummy x = self.extract_feat(img) File "xxx/anaconda3/envs/openmmlab/lib/python3.7/site-packages/mmdet/models/detectors/single_stage.py", line 43, in extract_feat x = self.backbone(img) File "xxx/anaconda3/envs/openmmlab/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl return forward_call(*input, kwargs) File "xxx/anaconda3/envs/openmmlab/lib/python3.7/site-packages/mmdet/models/backbones/csp_darknet.py", line 281, in forward x = layer(x) File "xxx/anaconda3/envs/openmmlab/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl return forward_call(*input, *kwargs) File "xxx/anaconda3/envs/openmmlab/lib/python3.7/site-packages/torch/nn/modules/container.py", line 139, in forward input = module(input) File "xxx/anaconda3/envs/openmmlab/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl return forward_call(input, kwargs) File "xxx/anaconda3/envs/openmmlab/lib/python3.7/site-packages/mmdet/models/utils/csp_layer.py", line 150, in forward return self.final_conv(x_final) File "xxx/anaconda3/envs/openmmlab/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl return forward_call(*input, *kwargs) File "/home/yangmin/anaconda3/envs/openmmlab/lib/python3.7/site-packages/mmcv/cnn/bricks/conv_module.py", line 201, in forward x = self.conv(x) File "xxx/anaconda3/envs/openmmlab/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl return forward_call(input, *kwargs) File "xxx/share/openmmlab/mmrazor_superacme_0.3.0/mmrazor/models/pruners/structure_pruning.py", line 433, in modified_forward feature = feature self.in_mask
I can run the above code normally on both cpu and cuda. Is this the whole error log? It seems that something is left.
Whole log:
/home/yangmin/anaconda3/envs/openmmlab/lib/python3.7/site-packages/torch/nn/functional.py:718: UserWarning: Named tensors and all their associated APIs are an experimental feature and subject to change. Please do not use them for anything important until they are released as stable. (Triggered internally at /pytorch/c10/core/TensorImpl.h:1156.) return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode) Traceback (most recent call last): File "/home/yangmin/share/openmmlab/mmrazor_superacme_0.3.0/mmrazor/models/algorithms/autoslim.py", line 76, in _init_pruner pseudo_architecture.forward_dummy(pseudo_img) File "/home/yangmin/share/openmmlab/mmrazor_superacme_0.3.0/mmrazor/models/architectures/base.py", line 21, in forward_dummy return self.model.forward_dummy(img) File "/home/yangmin/anaconda3/envs/openmmlab/lib/python3.7/site-packages/mmdet/models/detectors/single_stage.py", line 53, in forward_dummy x = self.extract_feat(img) File "/home/yangmin/anaconda3/envs/openmmlab/lib/python3.7/site-packages/mmdet/models/detectors/single_stage.py", line 43, in extract_feat x = self.backbone(img) File "/home/yangmin/anaconda3/envs/openmmlab/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl return forward_call(*input, kwargs) File "/home/yangmin/anaconda3/envs/openmmlab/lib/python3.7/site-packages/mmdet/models/backbones/csp_darknet.py", line 281, in forward x = layer(x) File "/home/yangmin/anaconda3/envs/openmmlab/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl return forward_call(*input, *kwargs) File "/home/yangmin/anaconda3/envs/openmmlab/lib/python3.7/site-packages/torch/nn/modules/container.py", line 139, in forward input = module(input) File "/home/yangmin/anaconda3/envs/openmmlab/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl return forward_call(input, kwargs) File "/home/yangmin/anaconda3/envs/openmmlab/lib/python3.7/site-packages/mmdet/models/utils/csp_layer.py", line 150, in forward return self.final_conv(x_final) File "/home/yangmin/anaconda3/envs/openmmlab/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl return forward_call(*input, *kwargs) File "/home/yangmin/anaconda3/envs/openmmlab/lib/python3.7/site-packages/mmcv/cnn/bricks/conv_module.py", line 201, in forward x = self.conv(x) File "/home/yangmin/anaconda3/envs/openmmlab/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl return forward_call(input, *kwargs) File "/home/yangmin/share/openmmlab/mmrazor_superacme_0.3.0/mmrazor/models/pruners/structure_pruning.py", line 433, in modified_forward feature = feature self.in_mask RuntimeError: The size of tensor a (37) must match the size of tensor b (64) at non-singleton dimension 1
During handling of the above exception, another exception occurred:
Traceback (most recent call last): File "/home/yangmin/anaconda3/envs/openmmlab/lib/python3.7/site-packages/mmcv/utils/registry.py", line 52, in build_from_cfg return obj_cls(args) File "/home/yangmin/share/openmmlab/mmrazor_superacme_0.3.0/mmrazor/models/algorithms/autoslim.py", line 42, in init super(AutoSlim, self).init(kwargs) File "/home/yangmin/share/openmmlab/mmrazor_superacme_0.3.0/mmrazor/models/algorithms/base.py", line 56, in init self._init_pruner(pruner) File "/home/yangmin/share/openmmlab/mmrazor_superacme_0.3.0/mmrazor/models/algorithms/autoslim.py", line 78, in _init_pruner raise NotImplementedError('Our current StructurePruner does not ' NotImplementedError: Our current StructurePruner does not support pruning this architecture. StructurePruner is not perfect enough to handle all the corner cases. We will appreciate it if you create a issue.
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "configs/pruning/yolox_algorithm.py", line 32, in
in structure_pruning.py and prepare_from_supernet, the following code excute ok: pseudo_img = torch.randn(1, 3, 224, 224)
pseudo_img = supernet.forward_dummy(pseudo_img)
but in autoslim.py, after sample_subnet/set_subnet/export_subnet/deploy_subnet, "forward_dummy" excute fail: pseudo_pruner = build_pruner(pruner) pseudo_architecture = copy.deepcopy(self.architecture) pseudo_pruner.prepare_from_supernet(pseudo_architecture) subnet_dict = pseudo_pruner.sample_subnet() pseudo_pruner.set_subnet(subnet_dict) subnet_dict = pseudo_pruner.export_subnet() pseudo_pruner.deploy_subnet(pseudo_architecture, subnet_dict) pseudo_img = torch.randn(1, 3, 224, 224) pseudo_architecture.forward_dummy(pseudo_img)
in conv_module.py/forward, I got x = torch.Size([1,29,56,56]) self.conv = Conv2d(64, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
the error occurred, when excute self.conv(x)
backbone.stage1.1.main_conv.conv: out_channel=24 backbone.stage1.1.short_conv.conv: out_channel=5 backbone.stage1.1.final_conv.conv: in_channel=64
it seems that these is an error when deal with concat?
Sorry, I made a mistake, I changed some code about concat.
@HIT-cwh Can you get me A whole config for yolox autoslim?
New error:
Traceback (most recent call last):
File "tools/mmdet/train_mmdet.py", line 211, in
OK, it works, Thanks!
OK, it works, Thanks!
Due to a lack of manpower, the progress of transferring AutoSlim to other tasks is not very satisfactory. And I'm very sorry for the inconvenience to you.
I met this error with pytorch 1.10.2, but works fine with pytorch 1.10.0.
Hi, I met the same issue using mmrazor-0.3.0 and mmdet-2.23.0. When runs the code provided by @HIT-cwh , I met exactly the same error as @NickYangMin .
Could you please provide some information or sugesstions about how to solve this issue? Thanks !
I met this error with pytorch 1.10.2, but works fine with pytorch 1.10.0.
Hi, I met the same issue using mmrazor-0.3.0 and mmdet-2.23.0. When runs the code provided by @HIT-cwh , I met exactly the same error as @NickYangMin . Could you please provide some information or sugesstions about how to solve this issue? Thanks !
I'm sorry for the inconvenience caused to you. The auto-trace in purner goes wrong with pytorch 1.10.2 and we will fix the errors in code as soon as possible.
In order not to affect your work, could you temporarily not use pytorch 1.8.1, 1.10.2, and 1.11.0 ? All other versions should be ok.
I met this error with pytorch 1.10.2, but works fine with pytorch 1.10.0.
Hi, I met the same issue using mmrazor-0.3.0 and mmdet-2.23.0. When runs the code provided by @HIT-cwh , I met exactly the same error as @NickYangMin . Could you please provide some information or sugesstions about how to solve this issue? Thanks !
I'm sorry for the inconvenience caused to you. The auto-trace in purner goes wrong with pytorch 1.10.2 and we will fix the errors in code as soon as possible.
In order not to affect your work, could you temporarily not use pytorch 1.8.1, 1.10.2, and 1.11.0 ? All other versions should be ok.
Everything seems fine for me when using Pytorch 1.10.0. Thanks!
I make a config for yolox to use autoslim, but get an error:
error:
Traceback (most recent call last): File "tools/mmdet/train_mmdet.py", line 199, in
main()
File "tools/mmdet/train_mmdet.py", line 175, in main
datasets = [build_dataset(cfg.data.train)]
File "/home/yangmin/share/openmmlab/mmdetection/mmdet/datasets/builder.py", line 77, in build_dataset
dataset = MultiImageMixDataset(**cp_cfg)
TypeError: init() got an unexpected keyword argument 'ann_file'
config
########################################### base = [ '../../base/datasets/mmdet/coco_detection.py', '../../base/schedules/mmdet/schedule_1x.py', '../../base/mmdet_runtime.py' ]
img_scale = (640, 640)
model = dict( type='mmdet.YOLOX', input_size=img_scale, random_size_range=(15, 25), random_size_interval=10, backbone=dict(type='CSPDarknet', deepen_factor=0.33, widen_factor=0.5), neck=dict( type='YOLOXPAFPN', in_channels=[128, 256, 512], out_channels=128, num_csp_blocks=1), bbox_head=dict( type='YOLOXHead', num_classes=80, in_channels=128, feat_channels=128), train_cfg=dict(assigner=dict(type='SimOTAAssigner', center_radius=2.5)),
In order to align the source code, the threshold of the val phase is
data_root = 'data/coco/' dataset_type = 'CocoDataset'
train_pipeline = [ dict(type='Mosaic', img_scale=img_scale, pad_val=114.0), dict( type='RandomAffine', scaling_ratio_range=(0.1, 2), border=(-img_scale[0] // 2, -img_scale[1] // 2)), dict( type='MixUp', img_scale=img_scale, ratio_range=(0.8, 1.6), pad_val=114.0), dict(type='YOLOXHSVRandomAug'), dict(type='RandomFlip', flip_ratio=0.5),
According to the official implementation, multi-scale
]
train_dataset = dict( type='MultiImageMixDataset', dataset=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_train2017.json', img_prefix=data_root + 'train2017/', pipeline=[ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations', with_bbox=True) ], filter_empty_gt=False, ), pipeline=train_pipeline)
test_pipeline = [ dict(type='LoadImageFromFile'), dict( type='MultiScaleFlipAug', img_scale=img_scale, flip=False, transforms=[ dict(type='Resize', keep_ratio=True), dict(type='RandomFlip'), dict( type='Pad', pad_to_square=True, pad_val=dict(img=(114.0, 114.0, 114.0))), dict(type='DefaultFormatBundle'), dict(type='Collect', keys=['img']) ]) ]
data = dict( samples_per_gpu=8, workers_per_gpu=4, persistent_workers=True, train=train_dataset, val=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', pipeline=test_pipeline), test=dict( type=dataset_type, ann_file=data_root + 'annotations/instances_val2017.json', img_prefix=data_root + 'val2017/', pipeline=test_pipeline))
optimizer = dict( type='SGD', lr=0.01, momentum=0.9, weight_decay=5e-4, nesterov=True, paramwise_cfg=dict(norm_decay_mult=0., bias_decay_mult=0.)) optimizer_config = dict(grad_clip=None)
max_epochs = 300 num_last_epochs = 15 resume_from = None interval = 10
lr_config = dict( delete=True, policy='YOLOX', warmup='exp', by_epoch=False, warmup_by_epoch=True, warmup_ratio=1, warmup_iters=5, # 5 epoch num_last_epochs=num_last_epochs, min_lr_ratio=0.05)
runner = dict(type='EpochBasedRunner', max_epochs=max_epochs)
custom_hooks = [ dict( type='YOLOXModeSwitchHook', num_last_epochs=num_last_epochs, priority=48), dict( type='SyncNormHook', num_last_epochs=num_last_epochs, interval=interval, priority=48), dict( type='ExpMomentumEMAHook', resume_from=resume_from, momentum=0.0001, priority=49) ] checkpoint_config = dict(interval=interval) evaluation = dict( save_best='auto',
The evaluation interval is 'interval' when running epoch is
log_config = dict(interval=50)
algorithm = dict( type='AutoSlim', architecture=dict(type='MMDetArchitecture', model=model),
distiller=dict(
runner = dict(type='EpochBasedRunner', max_epochs=50)
use_ddp_wrapper = True ###############################