When I use the website trying to check our results,
it will show an error like the figure,
and the error said "IndexError: index 20021 is out of bounds for dimension 0 with size 16128"
How can I fix this problem?
And this is my code about configs file
base = ('../../third_party/mmyolo/configs/yolov8/'
'yolov8_l_syncbn_fast_8xb16-500e_auo.py')
custom_imports = dict(imports=['yolo_world'], allow_failed_imports=False)
When I use the website trying to check our results, it will show an error like the figure, and the error said "IndexError: index 20021 is out of bounds for dimension 0 with size 16128" How can I fix this problem? And this is my code about configs file base = ('../../third_party/mmyolo/configs/yolov8/' 'yolov8_l_syncbn_fast_8xb16-500e_auo.py') custom_imports = dict(imports=['yolo_world'], allow_failed_imports=False)
num_classes = 4 num_training_classes = 4 max_epochs = 40 # Maximum training epochs close_mosaic_epochs = 30 save_epoch_intervals = 5 text_channels = 512 neck_embed_channels = [128, 256, base.last_stage_out_channels // 2] neck_num_heads = [4, 8, base.last_stage_out_channels // 2 // 32] base_lr = 1e-3 weight_decay = 0.0005 train_batch_size_per_gpu = 16
load_fron = 'pretrained_models/yolo_world_v2_l_obj365v1_goldg_pretrain_1280ft-9babe3f6.pth'
text_model_name = 'openai/clip-vit-base-patch32' persistent_workers = False
model = dict(type='YOLOWorldDetector', mm_neck=True, num_train_classes=num_training_classes, num_test_classes=num_classes, data_preprocessor=dict(type='YOLOWDetDataPreprocessor'), backbone=dict(delete=True, type='MultiModalYOLOBackbone', image_model={{base.model.backbone}}, text_model=dict(type='HuggingCLIPLanguageBackbone', model_name=text_model_name, frozen_modules=['all'])), neck=dict(type='YOLOWorldPAFPN', guide_channels=text_channels, embed_channels=neck_embed_channels, num_heads=neck_num_heads, block_cfg=dict(type='MaxSigmoidCSPLayerWithTwoConv')), bbox_head=dict(type='YOLOWorldHead', head_module=dict( type='YOLOWorldHeadModule', use_bn_head=True, embed_dims=text_channels, num_classes=num_training_classes)), train_cfg=dict(assigner=dict(num_classes=num_training_classes)))
text_transform = [ dict(type='RandomLoadText', num_neg_samples=(num_classes, num_classes), max_num_samples=num_training_classes, padding_to_max=True, padding_value=''), dict(type='mmdet.PackDetInputs', meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip', 'flip_direction', 'texts')) ] mosaic_affine_transform = [ dict(type='MultiModalMosaic', img_scale=base.img_scale, pad_val=114.0, pre_transform=base.pre_transform), dict( type='YOLOv5RandomAffine', max_rotate_degree=0.0, max_shear_degree=0.0, max_aspect_ratio=100., scaling_ratio_range=(1 - base.affine_scale, 1 + base.affine_scale), border=(-base.img_scale[0] // 2, -base.img_scale[1] // 2), border_val=(114, 114, 114)) ]
train_pipeline = [ base.pre_transform, mosaic_affine_transform, dict(type='YOLOv5MultiModalMixUp', prob=base.mixup_prob, pre_transform=[base.pre_transform, mosaic_affine_transform]), base.last_transform[:-1], text_transform ] train_pipeline_stage2 = [base.train_pipeline_stage2[:-1], text_transform]
metainfo = dict(classes={'Vertical Line', 'Horizontal Line', 'Horizontal Block', 'Target'}) coco_train_dataset = dict(delete=True, type='MultiModalDataset', dataset=dict( type='YOLOv5CocoDataset', metainfo = metainfo, data_root='data/auo', ann_file='annotations/ann_train.json', data_prefix=dict(img='dataset/train/'), filter_cfg=dict(filter_empty_gt=False, min_size=32)), class_text_path='data/texts/auo_class_texts.json', pipeline=train_pipeline)
train_dataloader = dict(persistent_workers=persistent_workers, batch_size=train_batch_size_per_gpu, collate_fn=dict(type='yolow_collate'), dataset=coco_train_dataset) test_pipeline = [ *base.test_pipeline[:-1], dict(type='LoadText'), dict(type='mmdet.PackDetInputs', meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor', 'pad_param', 'texts')) ] coco_val_dataset = dict( delete=True, type='MultiModalDataset', dataset=dict(type='YOLOv5CocoDataset', metainfo = metainfo, data_root='data/auo', ann_file='annotations/ann_val.json', data_prefix=dict(img='dataset/val/'), filter_cfg=dict(filter_empty_gt=False, min_size=32)), class_text_path='data/texts/auo_class_texts.json', pipeline=test_pipeline) val_dataloader = dict(dataset=coco_val_dataset) test_dataloader = val_dataloader
default_hooks = dict(param_scheduler=dict(scheduler_type='linear', lr_factor=0.01, max_epochs=max_epochs), checkpoint=dict(max_keep_ckpts=-1, save_best=None, interval=save_epoch_intervals)) custom_hooks = [ dict(type='EMAHook', ema_type='ExpMomentumEMA', momentum=0.0001, update_buffers=True, strict_load=False, priority=49), dict(type='mmdet.PipelineSwitchHook', switch_epoch=max_epochs - close_mosaic_epochs, switch_pipeline=train_pipeline_stage2) ] train_cfg = dict(max_epochs=max_epochs, val_interval=5, dynamic_intervals=[((max_epochs - close_mosaic_epochs), base.val_interval_stage2)]) optim_wrapper = dict(optimizer=dict( delete=True, type='SGD', lr=base_lr, momentum=0.937, nesterov=True, weight_decay=weight_decay, batch_size_per_gpu=train_batch_size_per_gpu), paramwise_cfg=dict( custom_keys={ 'backbone.text_model': dict(lr_mult=0.01), 'logit_scale': dict(weight_decay=0.0) }), constructor='YOLOWv5OptimizerConstructor')
val_evaluator = dict(delete=True, type='mmdet.CocoMetric', proposal_nums=(100, 1, 10), ann_file='data/auo/annotations/ann_val.json', metric='bbox')
Thanks a lot!