open-mmlab / mmdetection

OpenMMLab Detection Toolbox and Benchmark
https://mmdetection.readthedocs.io
Apache License 2.0
28.5k stars 9.28k forks source link

i cannot handle this issue i need some help #11730

Open mistletoe111 opened 1 month ago

mistletoe111 commented 1 month ago

File "C:\Users\mistletoe.conda\envs\d2l\lib\site-packages\mmdet\models\dense_heads\anchor_head.py", line 284, in _get_targets_single bbox_weights[pos_inds, :] = 1.0 RuntimeError: linearIndex.numel()sliceSizenElemBefore == expandedValue.numel() INTERNAL ASSERT FAILED at "C:\actions-runner\_work\pytorch\pytorch\builder\windows\pytorch\aten\src\ATen\native\cuda\Indexing.cu":387, please report a bug to PyTorch. number of flattened indices did not match number of elements in the value tensor: 48 vs 12 Error in atexit._run_exitfuncs:

what the hell

mistletoe111 commented 1 month ago

hello this is my config:auto_scale_lr = dict(base_batch_size=16, enable=False) backend_args = None classes = ( '0', ) data_root = 'data/' dataset_type = 'CocoDataset' default_hooks = dict( checkpoint=dict( by_epoch=False, interval=10000, max_keep_ckpts=1, type='CheckpointHook'), logger=dict(interval=50, type='LoggerHook'), param_scheduler=dict(type='ParamSchedulerHook'), sampler_seed=dict(type='DistSamplerSeedHook'), timer=dict(type='IterTimerHook'), visualization=dict(type='DetVisualizationHook')) default_scope = 'mmdet' env_cfg = dict( cudnn_benchmark=False, dist_cfg=dict(backend='nccl'), mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0)) launcher = 'none' load_from = None log_level = 'INFO' log_processor = dict(by_epoch=False, type='LogProcessor', window_size=50) max_iter = 90000 model = dict( backbone=dict( base_width=4, depth=101, frozen_stages=1, groups=64,

init_cfg=dict(checkpoint='open-mmlab://resnext101_64x4d', type='Pretrained'),

    norm_cfg=dict(requires_grad=True, type='BN'),
    norm_eval=True,
    num_stages=4,
    out_indices=(
        0,
        1,
        2,
        3,
    ),
    style='pytorch',
    type='ResNeXt'),
data_preprocessor=dict(
    bgr_to_rgb=True,
    mean=[
        123.675,
        116.28,
        103.53,
    ],
    pad_mask=True,
    pad_size_divisor=32,
    std=[
        58.395,
        57.12,
        57.375,
    ],
    type='DetDataPreprocessor'),
neck=dict(
    in_channels=[
        256,
        512,
        1024,
        2048,
    ],
    num_outs=5,
    out_channels=256,
    type='FPN'),
roi_head=dict(
    bbox_head=dict(
        bbox_coder=dict(
            target_means=[
                0.0,
                0.0,
                0.0,
                0.0,
            ],
            target_stds=[
                0.1,
                0.1,
                0.2,
                0.2,
            ],
            type='DeltaXYWHBBoxCoder'),
        fc_out_channels=1024,
        in_channels=256,
        loss_bbox=dict(loss_weight=1.0, type='L1Loss'),
        loss_cls=dict(
            loss_weight=1.0, type='CrossEntropyLoss', use_sigmoid=False),
        num_classes=1,
        reg_class_agnostic=False,
        roi_feat_size=7,
        type='Shared2FCBBoxHead'),
    bbox_roi_extractor=dict(
        featmap_strides=[
            4,
            8,
            16,
            32,
        ],
        out_channels=256,
        roi_layer=dict(output_size=7, sampling_ratio=0, type='RoIAlign'),
        type='SingleRoIExtractor'),
    mask_head=dict(
        conv_out_channels=256,
        in_channels=256,
        loss_mask=dict(
            loss_weight=1.0, type='CrossEntropyLoss', use_mask=True),
        num_classes=2,
        num_convs=4,
        type='FCNMaskHead'),
    mask_roi_extractor=dict(
        featmap_strides=[
            4,
            8,
            16,
            32,
        ],
        out_channels=256,
        roi_layer=dict(output_size=14, sampling_ratio=0, type='RoIAlign'),
        type='SingleRoIExtractor'),
    type='StandardRoIHead'),
rpn_head=dict(
    anchor_generator=dict(
        ratios=[
            0.5,
            1.0,
            2.0,
        ],
        scales=[
            8,
        ],
        strides=[
            4,
            8,
            16,
            32,
            64,
        ],
        type='AnchorGenerator'),
    bbox_coder=dict(
        target_means=[
            0.0,
            0.0,
            0.0,
            0.0,
        ],
        target_stds=[
            1.0,
            1.0,
            1.0,
            1.0,
        ],
        type='DeltaXYWHBBoxCoder'),
    feat_channels=256,
    in_channels=256,
    loss_bbox=dict(loss_weight=1.0, type='L1Loss'),
    loss_cls=dict(
        loss_weight=1.0, type='CrossEntropyLoss', use_sigmoid=True),
    type='RPNHead'),
test_cfg=dict(
    rcnn=dict(
        mask_thr_binary=0.5,
        max_per_img=100,
        nms=dict(iou_threshold=0.5, type='nms'),
        score_thr=0.05),
    rpn=dict(
        max_per_img=1000,
        min_bbox_size=0,
        nms=dict(iou_threshold=0.7, type='nms'),
        nms_pre=1000)),
train_cfg=dict(
    rcnn=dict(
        assigner=dict(
            ignore_iof_thr=-1,
            match_low_quality=True,
            min_pos_iou=0.5,
            neg_iou_thr=0.5,
            pos_iou_thr=0.5,
            type='MaxIoUAssigner'),
        debug=False,
        mask_size=28,
        pos_weight=-1,
        sampler=dict(
            add_gt_as_proposals=True,
            neg_pos_ub=-1,
            num=512,
            pos_fraction=0.25,
            type='RandomSampler')),
    rpn=dict(
        allowed_border=-1,
        assigner=dict(
            ignore_iof_thr=-1,
            match_low_quality=True,
            min_pos_iou=0.3,
            neg_iou_thr=0.3,
            pos_iou_thr=0.7,
            type='MaxIoUAssigner'),
        debug=False,
        pos_weight=-1,
        sampler=dict(
            add_gt_as_proposals=False,
            neg_pos_ub=-1,
            num=256,
            pos_fraction=0.5,
            type='RandomSampler')),
    rpn_proposal=dict(
        max_per_img=1000,
        min_bbox_size=0,
        nms=dict(iou_threshold=0.7, type='nms'),
        nms_pre=2000)),
type='MaskRCNN')

optim_wrapper = dict( clip_grad=dict(max_norm=1, norm_type=2), optimizer=dict( betas=( 0.9, 0.999, ), lr=6e-05, type='AdamW', weight_decay=0.0005), paramwise_cfg=dict(custom_keys=dict(norm=dict(decay_mult=0.0))), type='OptimWrapper') optimizer = dict(lr=0.01, momentum=0.9, type='SGD', weight_decay=0.0005) param_scheduler = [ dict( begin=0, by_epoch=False, end=1000, start_factor=0.001, type='LinearLR'), dict( begin=1000, by_epoch=False, end=90000, milestones=[ 60000, 72000, ], type='MultiStepLR'), ] resume = True test_cfg = dict(type='TestLoop') test_dataloader = dict( batch_size=5, dataset=dict( ann_file='youzi.json', backend_args=None, data_prefix=dict(img='images'), data_root='data/', metainfo=dict(classes=( 'youzi', )), pipeline=[ dict(backend_args=None, type='LoadImageFromFile'), dict(keep_ratio=True, scale=( 512, 512, ), type='Resize'), dict( pad_val=dict(img=( 114, 114, 114, )), size=( 512, 512, ), type='Pad'), dict( meta_keys=( 'img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor', ), type='PackDetInputs'), ], test_mode=True, type='CocoDataset'), drop_last=False, num_workers=10, persistent_workers=True, sampler=dict(shuffle=False, type='DefaultSampler')) test_evaluator = dict( ann_file='data/youzi.json', backend_args=None, format_only=False, metric=[ 'bbox', 'segm', ], proposal_nums=( 100, 1, 10, ), type='CocoMetric') test_pipeline = [ dict(backend_args=None, type='LoadImageFromFile'), dict(keep_ratio=True, scale=( 512, 512, ), type='Resize'), dict(pad_val=dict(img=( 114, 114, 114, )), size=( 512, 512, ), type='Pad'), dict( meta_keys=( 'img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor', ), type='PackDetInputs'), ] train_cfg = dict(max_iters=90000, type='IterBasedTrainLoop', val_interval=1000) train_dataloader = dict( batch_sampler=None, batch_size=1, dataset=dict( ann_file='youzi.json', backend_args=None, data_prefix=dict(img='images/'), data_root='data/', filter_cfg=dict(filter_empty_gt=True, min_size=32), metainfo=dict(classes=( 'youzi', )), pipeline=[ dict(backend_args=None, type='LoadImageFromFile'), dict( poly2mask=False, type='LoadAnnotations', with_bbox=True, with_mask=False), dict(img_scale=( 256, 256, ), pad_val=114.0, type='CachedMosaic'), dict( keep_ratio=True, ratio_range=( 0.1, 2.0, ), scale=( 1280, 1280, ), type='RandomResize'), dict( allow_negative_crop=True, crop_size=( 512, 512, ), recompute_bbox=True, type='RandomCrop'), dict(type='YOLOXHSVRandomAug'), dict(prob=0.5, type='RandomFlip'), dict( pad_val=dict(img=( 114, 114, 114, )), size=( 512, 512, ), type='Pad'), dict( img_scale=( 512, 512, ), max_cached_images=20, pad_val=( 114, 114, 114, ), ratio_range=( 1.0, 1.0, ), type='CachedMixUp'), dict(min_gt_bbox_wh=( 1, 1, ), type='FilterAnnotations'), dict(type='PackDetInputs'), ], type='CocoDataset'), num_workers=10, persistent_workers=True, pin_memory=True, sampler=dict(shuffle=True, type='DefaultSampler')) train_pipeline = [ dict(backend_args=None, type='LoadImageFromFile'), dict( poly2mask=False, type='LoadAnnotations', with_bbox=True, with_mask=True), dict(img_scale=( 512, 512, ), pad_val=114.0, type='CachedMosaic'), dict( keep_ratio=True, ratio_range=( 0.1, 2.0, ), scale=( 1280, 1280, ), type='RandomResize'), dict( allow_negative_crop=True, crop_size=( 512, 512, ), recompute_bbox=True, type='RandomCrop'), dict(type='YOLOXHSVRandomAug'), dict(prob=0.5, type='RandomFlip'), dict(pad_val=dict(img=( 114, 114, 114, )), size=( 512, 512, ), type='Pad'), dict( img_scale=( 512, 512, ), max_cached_images=20, pad_val=( 114, 114, 114, ), ratio_range=( 1.0, 1.0, ), type='CachedMixUp'), dict(min_gt_bbox_wh=( 1, 1, ), type='FilterAnnotations'), dict(type='PackDetInputs'), ] train_pipeline_stage2 = [ dict(backend_args=None, type='LoadImageFromFile'), dict( poly2mask=False, type='LoadAnnotations', with_bbox=True, with_mask=True), dict( keep_ratio=True, ratio_range=( 0.1, 2.0, ), scale=( 512, 512, ), type='RandomResize'), dict( allow_negative_crop=True, crop_size=( 512, 512, ), recompute_bbox=True, type='RandomCrop'), dict(min_gt_bbox_wh=( 1, 1, ), type='FilterAnnotations'), dict(type='YOLOXHSVRandomAug'), dict(prob=0.5, type='RandomFlip'), dict(pad_val=dict(img=( 114, 114, 114, )), size=( 512, 512, ), type='Pad'), dict(type='PackDetInputs'), ] tta_model = dict( tta_cfg=dict(max_per_img=100, nms=dict(iou_threshold=0.6, type='nms')), type='DetTTAModel') tta_pipeline = [ dict(backend_args=None, type='LoadImageFromFile'), dict( transforms=[ [ dict(keep_ratio=True, scale=( 512, 512, ), type='Resize'), dict(keep_ratio=True, scale=( 256, 256, ), type='Resize'), dict(keep_ratio=True, scale=( 1024, 1024, ), type='Resize'), ], [ dict(prob=1.0, type='RandomFlip'), dict(prob=0.0, type='RandomFlip'), ], [ dict( pad_val=dict(img=( 114, 114, 114, )), size=( 960, 960, ), type='Pad'), ], [ dict( meta_keys=( 'img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor', 'flip', 'flip_direction', ), type='PackDetInputs'), ], ], type='TestTimeAug'), ] val_cfg = dict(type='ValLoop') val_dataloader = dict( batch_size=5, dataset=dict( ann_file='youzi.json', backend_args=None, data_prefix=dict(img='images'), data_root='data/', filter_cfg=dict(filter_empty_gt=True, min_size=32), metainfo=dict(classes=( 'youzi', )), pipeline=[ dict(backend_args=None, type='LoadImageFromFile'), dict(keep_ratio=True, scale=( 512, 512, ), type='Resize'), dict( pad_val=dict(img=( 114, 114, 114, )), size=( 512, 512, ), type='Pad'), dict( meta_keys=( 'img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor', ), type='PackDetInputs'), ], test_mode=True, type='CocoDataset'), drop_last=False, num_workers=10, persistent_workers=True, sampler=dict(shuffle=False, type='DefaultSampler')) val_evaluator = dict( ann_file='data/youzi.json', backend_args=None, format_only=False, metric=[ 'bbox', 'segm', ], type='CocoMetric') vis_backends = [ dict(type='LocalVisBackend'), ] visualizer = dict( name='visualizer', type='DetLocalVisualizer', vis_backends=[ dict(type='LocalVisBackend'), ]) work_dir = 'results/1_mask-rcnn_x101-64x4d_fpn_ms-poly_3x_coco'