Open lin-whale opened 5 months ago
The training output:
]
use_mask2refine = True
val_ann_file = 'test/2024-03-28-ann-cvat/annotations/annotations.json'
val_batch_size_per_gpu = 1
val_cfg = dict(type='ValLoop')
val_data_prefix = 'test/2024-03-28-ann-cvat/images/'
val_dataloader = dict(
batch_size=1,
dataset=dict(
ann_file=
'/data/cvat/train/2024-04-02-ann-cvat/annotations/annotations.json.test',
batch_shapes_cfg=None,
data_prefix=dict(img='images/'),
data_root='/data/cvat/train/2024-04-02-ann-cvat',
filter_cfg=dict(filter_empty_gt=False, min_size=32),
metainfo=dict(
classes=(
'floor',
'person',
'paper',
'bottle',
'paper cup',
...
)),
pipeline=[
dict(backend_args=None, type='LoadImageFromFile'),
dict(scale=(
640,
640,
), type='YOLOv5KeepRatioResize'),
dict(
allow_scale_up=False,
pad_val=dict(img=114),
scale=(
640,
640,
),
...
Done (t=0.20s)
creating index...
index created!
04/24 01:45:39 - mmengine - WARNING - "FileClient" will be deprecated in future. Please use io functions in https://mmengine.readthedocs.io/en/latest/api/fileio.html#file-io
04/24 01:45:39 - mmengine - WARNING - "HardDiskBackend" is the alias of "LocalBackend" and the former will be deprecated in future.
04/24 01:45:39 - mmengine - INFO - Checkpoints will be saved to /home/aistar/yolo-world/YOLO-World/work_dirs/yolo_world_v2_l_vlpan_bn_2e-4_80e_8gpus_mask-refine_prompt_tuning_coco.
/opt/conda/lib/python3.10/site-packages/torch/functional.py:507: UserWarning: torch.meshgrid: in an upcoming release, it will be required to pass the indexing argument. (Triggered internally at /opt/conda/conda-bld/pytorch_1711403380909/work/aten/src/ATen/native/TensorShape.cpp:3549.)
return _VF.meshgrid(tensors, **kwargs) # type: ignore[attr-defined]
04/24 01:45:59 - mmengine - INFO - Epoch(train) [1][ 50/290] base_lr: 2.0000e-03 lr: 9.8000e-05 eta: 3:15:16 time: 0.4047 data_time: 0.0721 memory: 10524 grad_norm: 0.0000 loss: 209.2231 loss_cls: 85.0021 loss_bbox: 57.6584 loss_dfl: 66.5625
04/24 01:46:11 - mmengine - INFO - Epoch(train) [1][100/290] base_lr: 2.0000e-03 lr: 1.9800e-04 eta: 2:36:04 time: 0.2433 data_time: 0.0477 memory: 4760 grad_norm: 0.0000 loss: 208.6729 loss_cls: 84.9814 loss_bbox: 57.1290 loss_dfl: 66.5625
04/24 01:46:20 - mmengine - INFO - Epoch(train) [1][150/290] base_lr: 2.0000e-03 lr: 2.9800e-04 eta: 2:12:05 time: 0.1761 data_time: 0.0129 memory: 4706 grad_norm: 0.0000 loss: 208.2608 loss_cls: 84.9480 loss_bbox: 56.7503 loss_dfl: 66.5625
04/24 01:46:29 - mmengine - INFO - Epoch(train) [1][200/290] base_lr: 2.0000e-03 lr: 3.9800e-04 eta: 2:00:17 time: 0.1783 data_time: 0.0173 memory: 4880 grad_norm: 0.0000 loss: 208.5585 loss_cls: 84.8515 loss_bbox: 57.1445 loss_dfl: 66.5625
04/24 01:46:37 - mmengine - INFO - Epoch(train) [1][250/290] base_lr: 2.0000e-03 lr: 4.9800e-04 eta: 1:52:12 time: 0.1685 data_time: 0.0032 memory: 4640 grad_norm: 0.0000 loss: 208.4256 loss_cls: 84.9207 loss_bbox: 56.9424 loss_dfl: 66.5625
04/24 01:46:45 - mmengine - INFO - Exp name: yolo_world_v2_l_vlpan_bn_2e-4_80e_8gpus_mask-refine_prompt_tuning_coco_20240424_014520
04/24 01:46:57 - mmengine - INFO - Epoch(train) [2][ 50/290] base_lr: 2.0000e-03 lr: 6.7129e-04 eta: 1:48:58 time: 0.2320 data_time: 0.0606 memory: 4920 grad_norm: 0.0000 loss: 208.7070 loss_cls: 85.1418 loss_bbox: 57.0027 loss_dfl: 66.5625
04/24 01:47:05 - mmengine - INFO - Epoch(train) [2][100/290] base_lr: 2.0000e-03 lr: 7.7030e-04 eta: 1:45:39 time: 0.1772 data_time: 0.0228 memory: 4640 grad_norm: 0.0000 loss: 209.1946 loss_cls: 85.3240 loss_bbox: 57.3081 loss_dfl: 66.5625
04/24 01:47:14 - mmengine - INFO - Epoch(train) [2][150/290] base_lr: 2.0000e-03 lr: 8.6931e-04 eta: 1:43:13 time: 0.1798 data_time: 0.0082 memory: 4706 grad_norm: 0.0000 loss: 208.6277 loss_cls: 85.0666 loss_bbox: 56.9986 loss_dfl: 66.5625
04/24 01:47:23 - mmengine - INFO - Epoch(train) [2][200/290] base_lr: 2.0000e-03 lr: 9.6832e-04 eta: 1:40:57 time: 0.1738 data_time: 0.0115 memory: 4560 grad_norm: 0.0000 loss: 209.1859 loss_cls: 85.0124 loss_bbox: 57.6110 loss_dfl: 66.5625
04/24 01:47:32 - mmengine - INFO - Epoch(train) [2][250/290] base_lr: 2.0000e-03 lr: 1.0673e-03 eta: 1:39:16 time: 0.1781 data_time: 0.0104 memory: 4520 grad_norm: 0.0000 loss: 208.7904 loss_cls: 85.1610 loss_bbox: 57.0668 loss_dfl: 66.5625
04/24 01:47:39 - mmengine - INFO - Exp name: yolo_world_v2_l_vlpan_bn_2e-4_80e_8gpus_mask-refine_prompt_tuning_coco_20240424_014520
04/24 01:47:51 - mmengine - INFO - Epoch(train) [3][ 50/290] base_lr: 2.0000e-03 lr: 1.2331e-03 eta: 1:39:06 time: 0.2327 data_time: 0.0540 memory: 4760 grad_norm: 0.0000 loss: 209.1261 loss_cls: 85.0435 loss_bbox: 57.5202 loss_dfl: 66.5625
04/24 01:48:00 - mmengine - INFO - Epoch(train) [3][100/290] base_lr: 2.0000e-03 lr: 1.3311e-03 eta: 1:37:54 time: 0.1798 data_time: 0.0118 memory: 4560 grad_norm: 0.0000 loss: 208.6577 loss_cls: 84.9895 loss_bbox: 57.1057 loss_dfl: 66.5625
04/24 01:48:09 - mmengine - INFO - Epoch(train) [3][150/290] base_lr: 2.0000e-03 lr: 1.4291e-03 eta: 1:36:45 time: 0.1771 data_time: 0.0109 memory: 4813 grad_norm: 0.0000 loss: 208.6249 loss_cls: 84.9350 loss_bbox: 57.1273 loss_dfl: 66.5625
04/24 01:48:18 - mmengine - INFO - Epoch(train) [3][200/290] base_lr: 2.0000e-03 lr: 1.5272e-03 eta: 1:35:39 time: 0.1746 data_time: 0.0081 memory: 4507 grad_norm: 0.0000 loss: 208.9671 loss_cls: 85.0744 loss_bbox: 57.3302 loss_dfl: 66.5625
04/24 01:48:26 - mmengine - INFO - Epoch(train) [3][250/290] base_lr: 2.0000e-03 lr: 1.6252e-03 eta: 1:34:13 time: 0.1587 data_time: 0.0127 memory: 4666 grad_norm: 0.0000 loss: 208.3120 loss_cls: 84.8422 loss_bbox: 56.9073 loss_dfl: 66.5625
04/24 01:48:34 - mmengine - INFO - Exp name: yolo_world_v2_l_vlpan_bn_2e-4_80e_8gpus_mask-refine_prompt_tuning_coco_20240424_014520
04/24 01:48:45 - mmengine - INFO - Epoch(train) [4][ 50/290] base_lr: 2.0000e-03 lr: 1.7834e-03 eta: 1:34:26 time: 0.2219 data_time: 0.0478 memory: 4733 grad_norm: 0.0000 loss: 208.5537 loss_cls: 84.9261 loss_bbox: 57.0651 loss_dfl: 66.5625
04/24 01:48:54 - mmengine - INFO - Epoch(train) [4][100/290] base_lr: 2.0000e-03 lr: 1.8804e-03 eta: 1:33:37 time: 0.1745 data_time: 0.0155 memory: 4666 grad_norm: 0.0000 loss: 208.6375 loss_cls: 85.1082 loss_bbox: 56.9668 loss_dfl: 66.5625
04/24 01:48:59 - mmengine - INFO - Exp name: yolo_world_v2_l_vlpan_bn_2e-4_80e_8gpus_mask-refine_prompt_tuning_coco_20240424_014520
04/24 01:49:02 - mmengine - INFO - Epoch(train) [4][150/290] base_lr: 2.0000e-03 lr: 1.9406e-03 eta: 1:32:50 time: 0.1735 data_time: 0.0153 memory: 4813 grad_norm: 0.0000 loss: 209.0560 loss_cls: 84.9764 loss_bbox: 57.5171 loss_dfl: 66.5625
04/24 01:49:11 - mmengine - INFO - Epoch(train) [4][200/290] base_lr: 2.0000e-03 lr: 1.9406e-03 eta: 1:32:11 time: 0.1771 data_time: 0.0158 memory: 4493 grad_norm: 0.0000 loss: 208.8431 loss_cls: 84.9746 loss_bbox: 57.3060 loss_dfl: 66.5625
04/24 01:49:20 - mmengine - INFO - Epoch(train) [4][250/290] base_lr: 2.0000e-03 lr: 1.9406e-03 eta: 1:31:45 time: 0.1847 data_time: 0.0158 memory: 4507 grad_norm: 0.0000 loss: 208.0752 loss_cls: 84.9316 loss_bbox: 56.5811 loss_dfl: 66.5625
04/24 01:49:27 - mmengine - INFO - Exp name: yolo_world_v2_l_vlpan_bn_2e-4_80e_8gpus_mask-refine_prompt_tuning_coco_20240424_014520
04/24 01:49:38 - mmengine - INFO - Epoch(train) [5][ 50/290] base_lr: 2.0000e-03 lr: 1.9406e-03 eta: 1:31:28 time: 0.2187 data_time: 0.0511 memory: 4466 grad_norm: 0.0000 loss: 208.7696 loss_cls: 85.0308 loss_bbox: 57.1764 loss_dfl: 66.5625
04/24 01:49:47 - mmengine - INFO - Epoch(train) [5][100/290] base_lr: 2.0000e-03 lr: 1.9406e-03 eta: 1:30:50 time: 0.1719 data_time: 0.0072 memory: 4613 grad_norm: 0.0000 loss: 208.6188 loss_cls: 84.9198 loss_bbox: 57.1365 loss_dfl: 66.5625
04/24 01:49:57 - mmengine - INFO - Epoch(train) [5][150/290] base_lr: 2.0000e-03 lr: 1.9406e-03 eta: 1:30:52 time: 0.2073 data_time: 0.0197 memory: 4840 grad_norm: 0.0000 loss: 209.2043 loss_cls: 85.1290 loss_bbox: 57.5128 loss_dfl: 66.5625
04/24 01:50:06 - mmengine - INFO - Epoch(train) [5][200/290] base_lr: 2.0000e-03 lr: 1.9406e-03 eta: 1:30:23 time: 0.1782 data_time: 0.0112 memory: 4746 grad_norm: 0.0000 loss: 208.6433 loss_cls: 84.9482 loss_bbox: 57.1325 loss_dfl: 66.5625
04/24 01:50:15 - mmengine - INFO - Epoch(train) [5][250/290] base_lr: 2.0000e-03 lr: 1.9406e-03 eta: 1:29:50 time: 0.1720 data_time: 0.0104 memory: 4480 grad_norm: 0.0000 loss: 209.0454 loss_cls: 85.0755 loss_bbox: 57.4073 loss_dfl: 66.5625
04/24 01:50:22 - mmengine - INFO - Exp name: yolo_world_v2_l_vlpan_bn_2e-4_80e_8gpus_mask-refine_prompt_tuning_coco_20240424_014520
04/24 01:50:22 - mmengine - INFO - Saving checkpoint at 5 epochs
04/24 01:50:24 - mmengine - WARNING - `save_param_scheduler` is True but `self.param_schedulers` is None, so skip saving parameter schedulers
04/24 01:50:29 - mmengine - INFO - Epoch(val) [5][ 50/1160] eta: 0:01:21 time: 0.0738 data_time: 0.0058 memory: 4533
04/24 01:50:30 - mmengine - INFO - Epoch(val) [5][ 100/1160] eta: 0:00:56 time: 0.0323 data_time: 0.0016 memory: 596
04/24 01:50:32 - mmengine - INFO - Epoch(val) [5][ 150/1160] eta: 0:00:45 time: 0.0303 data_time: 0.0006 memory: 596
04/24 01:50:33 - mmengine - INFO - Epoch(val) [5][ 200/1160] eta: 0:00:39 time: 0.0281 data_time: 0.0005 memory: 596
04/24 01:50:35 - mmengine - INFO - Epoch(val) [5][ 250/1160] eta: 0:00:34 time: 0.0263 data_time: 0.0012 memory: 596
04/24 01:50:36 - mmengine - INFO - Epoch(val) [5][ 300/1160] eta: 0:00:32 time: 0.0369 data_time: 0.0007 memory: 596
04/24 01:50:38 - mmengine - INFO - Epoch(val) [5][ 350/1160] eta: 0:00:29 time: 0.0250 data_time: 0.0005 memory: 596
04/24 01:50:39 - mmengine - INFO - Epoch(val) [5][ 400/1160] eta: 0:00:26 time: 0.0295 data_time: 0.0009 memory: 596
04/24 01:50:41 - mmengine - INFO - Epoch(val) [5][ 450/1160] eta: 0:00:24 time: 0.0288 data_time: 0.0006 memory: 596
04/24 01:50:42 - mmengine - INFO - Epoch(val) [5][ 500/1160] eta: 0:00:22 time: 0.0240 data_time: 0.0009 memory: 596
04/24 01:50:44 - mmengine - INFO - Epoch(val) [5][ 550/1160] eta: 0:00:20 time: 0.0361 data_time: 0.0011 memory: 596
04/24 01:50:45 - mmengine - INFO - Epoch(val) [5][ 600/1160] eta: 0:00:18 time: 0.0281 data_time: 0.0009 memory: 596
04/24 01:50:47 - mmengine - INFO - Epoch(val) [5][ 650/1160] eta: 0:00:16 time: 0.0318 data_time: 0.0012 memory: 596
04/24 01:50:49 - mmengine - INFO - Epoch(val) [5][ 700/1160] eta: 0:00:15 time: 0.0380 data_time: 0.0012 memory: 596
04/24 01:50:50 - mmengine - INFO - Epoch(val) [5][ 750/1160] eta: 0:00:13 time: 0.0320 data_time: 0.0007 memory: 596
04/24 01:50:52 - mmengine - INFO - Epoch(val) [5][ 800/1160] eta: 0:00:11 time: 0.0265 data_time: 0.0008 memory: 596
04/24 01:50:53 - mmengine - INFO - Epoch(val) [5][ 850/1160] eta: 0:00:10 time: 0.0225 data_time: 0.0012 memory: 596
04/24 01:50:54 - mmengine - INFO - Epoch(val) [5][ 900/1160] eta: 0:00:08 time: 0.0330 data_time: 0.0015 memory: 596
04/24 01:50:56 - mmengine - INFO - Epoch(val) [5][ 950/1160] eta: 0:00:06 time: 0.0290 data_time: 0.0009 memory: 596
04/24 01:50:58 - mmengine - INFO - Epoch(val) [5][1000/1160] eta: 0:00:05 time: 0.0367 data_time: 0.0016 memory: 596
04/24 01:50:59 - mmengine - INFO - Epoch(val) [5][1050/1160] eta: 0:00:03 time: 0.0285 data_time: 0.0007 memory: 596
04/24 01:51:00 - mmengine - INFO - Epoch(val) [5][1100/1160] eta: 0:00:01 time: 0.0283 data_time: 0.0008 memory: 596
04/24 01:51:02 - mmengine - INFO - Epoch(val) [5][1150/1160] eta: 0:00:00 time: 0.0357 data_time: 0.0015 memory: 596
04/24 01:51:02 - mmengine - INFO - Evaluating bbox...
Loading and preparing results...
04/24 01:51:02 - mmengine - ERROR - /opt/conda/lib/python3.10/site-packages/mmdet/evaluation/metrics/coco_metric.py - compute_metrics - 469 - The testing results of the whole dataset is empty.
04/24 01:51:02 - mmengine - INFO - Epoch(val) [5][1160/1160] data_time: 0.0012 time: 0.0321
04/24 01:51:02 - mmengine - WARNING - Since `metrics` is an empty dict, the behavior to save the best checkpoint will be skipped in this evaluation.
04/24 01:51:15 - mmengine - INFO - Epoch(train) [6][ 50/290] base_lr: 2.0000e-03 lr: 1.9208e-03 eta: 1:30:04 time: 0.2459 data_time: 0.0610 memory: 4564 grad_norm: 0.0000 loss: 208.6450 loss_cls: 85.1488 loss_bbox: 56.9337 loss_dfl: 66.5625
04/24 01:51:24 - mmengine - INFO - Epoch(train) [6][100/290] base_lr: 2.0000e-03 lr: 1.9208e-03 eta: 1:29:36 time: 0.1752 data_time: 0.0077 memory: 4577 grad_norm: 0.0000 loss: 208.5121 loss_cls: 84.7862 loss_bbox: 57.1634 loss_dfl: 66.5625
04/24 01:51:34 - mmengine - INFO - Epoch(train) [6][150/290] base_lr: 2.0000e-03 lr: 1.9208e-03 eta: 1:29:31 time: 0.2024 data_time: 0.0244 memory: 4430 grad_norm: 0.0000 loss: 209.0030 loss_cls: 85.0959 loss_bbox: 57.3446 loss_dfl: 66.5625
04/24 01:51:42 - mmengine - INFO - Epoch(train) [6][200/290] base_lr: 2.0000e-03 lr: 1.9208e-03 eta: 1:29:05 time: 0.1756 data_time: 0.0224 memory: 4671 grad_norm: 0.0000 loss: 208.4982 loss_cls: 84.8802 loss_bbox: 57.0555 loss_dfl: 66.5625
04/24 01:51:51 - mmengine - INFO - Epoch(train) [6][250/290] base_lr: 2.0000e-03 lr: 1.9208e-03 eta: 1:28:40 time: 0.1770 data_time: 0.0078 memory: 4416 grad_norm: 0.0000 loss: 208.7319 loss_cls: 85.0956 loss_bbox: 57.0738 loss_dfl: 66.5625
04/24 01:51:59 - mmengine - INFO - Exp name: yolo_world_v2_l_vlpan_bn_2e-4_80e_8gpus_mask-refine_prompt_tuning_coco_20240424_014520
04/24 01:52:11 - mmengine - INFO - Epoch(train) [7][ 50/290] base_lr: 2.0000e-03 lr: 1.9010e-03 eta: 1:28:51 time: 0.2433 data_time: 0.0595 memory: 4643 grad_norm: 0.0000 loss: 209.1036 loss_cls: 85.1163 loss_bbox: 57.4247 loss_dfl: 66.5625
04/24 01:52:19 - mmengine - INFO - Epoch(train) [7][100/290] base_lr: 2.0000e-03 lr: 1.9010e-03 eta: 1:28:24 time: 0.1728 data_time: 0.0014 memory: 4443 grad_norm: 0.0000 loss: 208.8363 loss_cls: 84.8555 loss_bbox: 57.4183 loss_dfl: 66.5625
04/24 01:52:28 - mmengine - INFO - Epoch(train) [7][150/290] base_lr: 2.0000e-03 lr: 1.9010e-03 eta: 1:28:01 time: 0.1766 data_time: 0.0150 memory: 4643 grad_norm: 0.0000 loss: 209.2883 loss_cls: 85.2692 loss_bbox: 57.4565 loss_dfl: 66.5625
04/24 01:52:37 - mmengine - INFO - Epoch(train) [7][200/290] base_lr: 2.0000e-03 lr: 1.9010e-03 eta: 1:27:38 time: 0.1756 data_time: 0.0093 memory: 4590 grad_norm: 0.0000 loss: 209.3806 loss_cls: 85.1305 loss_bbox: 57.6876 loss_dfl: 66.5625
04/24 01:52:46 - mmengine - INFO - Epoch(train) [7][250/290] base_lr: 2.0000e-03 lr: 1.9010e-03 eta: 1:27:23 time: 0.1872 data_time: 0.0144 memory: 4656 grad_norm: 0.0000 loss: 209.1152 loss_cls: 84.9242 loss_bbox: 57.6285 loss_dfl: 66.5625
04/24 01:52:49 - mmengine - INFO - Exp name: yolo_world_v2_l_vlpan_bn_2e-4_80e_8gpus_mask-refine_prompt_tuning_coco_20240424_014520
04/24 01:52:54 - mmengine - INFO - Exp name: yolo_world_v2_l_vlpan_bn_2e-4_80e_8gpus_mask-refine_prompt_tuning_coco_20240424_014520
04/24 01:53:05 - mmengine - INFO - Epoch(train) [8][ 50/290] base_lr: 2.0000e-03 lr: 1.8812e-03 eta: 1:27:13 time: 0.2192 data_time: 0.0511 memory: 4630 grad_norm: 0.0000 loss: 209.1739 loss_cls: 85.0660 loss_bbox: 57.5453 loss_dfl: 66.5625
04/24 01:53:15 - mmengine - INFO - Epoch(train) [8][100/290] base_lr: 2.0000e-03 lr: 1.8812e-03 eta: 1:27:07 time: 0.2008 data_time: 0.0103 memory: 4443 grad_norm: 0.0000 loss: 208.7517 loss_cls: 85.0137 loss_bbox: 57.1755 loss_dfl: 66.5625
04/24 01:53:23 - mmengine - INFO - Epoch(train) [8][150/290] base_lr: 2.0000e-03 lr: 1.8812e-03 eta: 1:26:44 time: 0.1729 data_time: 0.0097 memory: 4523 grad_norm: 0.0000 loss: 208.3930 loss_cls: 85.0247 loss_bbox: 56.8058 loss_dfl: 66.5625
04/24 01:53:32 - mmengine - INFO - Epoch(train) [8][200/290] base_lr: 2.0000e-03 lr: 1.8812e-03 eta: 1:26:22 time: 0.1729 data_time: 0.0050 memory: 4910 grad_norm: 0.0000 loss: 208.6075 loss_cls: 85.0593 loss_bbox: 56.9856 loss_dfl: 66.5625
04/24 01:53:41 - mmengine - INFO - Epoch(train) [8][250/290] base_lr: 2.0000e-03 lr: 1.8812e-03 eta: 1:26:00 time: 0.1724 data_time: 0.0069 memory: 4603 grad_norm: 0.0000 loss: 208.7036 loss_cls: 85.0302 loss_bbox: 57.1108 loss_dfl: 66.5625
04/24 01:53:48 - mmengine - INFO - Exp name: yolo_world_v2_l_vlpan_bn_2e-4_80e_8gpus_mask-refine_prompt_tuning_coco_20240424_014520
04/24 01:54:00 - mmengine - INFO - Epoch(train) [9][ 50/290] base_lr: 2.0000e-03 lr: 1.8614e-03 eta: 1:26:08 time: 0.2425 data_time: 0.0541 memory: 4870 grad_norm: 0.0000 loss: 208.8394 loss_cls: 85.1671 loss_bbox: 57.1098 loss_dfl: 66.5625
04/24 01:54:09 - mmengine - INFO - Epoch(train) [9][100/290] base_lr: 2.0000e-03 lr: 1.8614e-03 eta: 1:25:47 time: 0.1731 data_time: 0.0069 memory: 4577 grad_norm: 0.0000 loss: 208.5417 loss_cls: 84.9505 loss_bbox: 57.0286 loss_dfl: 66.5625
04/24 01:54:18 - mmengine - INFO - Epoch(train) [9][150/290] base_lr: 2.0000e-03 lr: 1.8614e-03 eta: 1:25:27 time: 0.1738 data_time: 0.0166 memory: 4510 grad_norm: 0.0000 loss: 209.0994 loss_cls: 85.0374 loss_bbox: 57.4995 loss_dfl: 66.5625
04/24 01:54:27 - mmengine - INFO - Epoch(train) [9][200/290] base_lr: 2.0000e-03 lr: 1.8614e-03 eta: 1:25:08 time: 0.1765 data_time: 0.0088 memory: 4550 grad_norm: 0.0000 loss: 208.5768 loss_cls: 85.0928 loss_bbox: 56.9215 loss_dfl: 66.5625
04/24 01:54:36 - mmengine - INFO - Epoch(train) [9][250/290] base_lr: 2.0000e-03 lr: 1.8614e-03 eta: 1:25:01 time: 0.1972 data_time: 0.0190 memory: 4403 grad_norm: 0.0000 loss: 208.5594 loss_cls: 85.0345 loss_bbox: 56.9624 loss_dfl: 66.5625
04/24 01:54:44 - mmengine - INFO - Exp name: yolo_world_v2_l_vlpan_bn_2e-4_80e_8gpus_mask-refine_prompt_tuning_coco_20240424_014520
04/24 01:54:56 - mmengine - INFO - Epoch(train) [10][ 50/290] base_lr: 2.0000e-03 lr: 1.8416e-03 eta: 1:25:07 time: 0.2423 data_time: 0.0514 memory: 4991 grad_norm: 0.0000 loss: 208.5929 loss_cls: 85.0034 loss_bbox: 57.0269 loss_dfl: 66.5625
04/24 01:55:06 - mmengine - INFO - Epoch(train) [10][100/290] base_lr: 2.0000e-03 lr: 1.8416e-03 eta: 1:24:53 time: 0.1849 data_time: 0.0157 memory: 4443 grad_norm: 0.0000 loss: 209.1813 loss_cls: 85.0113 loss_bbox: 57.6075 loss_dfl: 66.5625
04/24 01:55:15 - mmengine - INFO - Epoch(train) [10][150/290] base_lr: 2.0000e-03 lr: 1.8416e-03 eta: 1:24:39 time: 0.1852 data_time: 0.0119 memory: 4630 grad_norm: 0.0000 loss: 209.0244 loss_cls: 85.0950 loss_bbox: 57.3670 loss_dfl: 66.5625
04/24 01:55:27 - mmengine - INFO - Epoch(train) [10][200/290] base_lr: 2.0000e-03 lr: 1.8416e-03 eta: 1:24:55 time: 0.2496 data_time: 0.0425 memory: 4991 grad_norm: 0.0000 loss: 208.7363 loss_cls: 84.8396 loss_bbox: 57.3342 loss_dfl: 66.5625
04/24 01:55:39 - mmengine - INFO - Epoch(train) [10][250/290] base_lr: 2.0000e-03 lr: 1.8416e-03 eta: 1:25:01 time: 0.2273 data_time: 0.0430 memory: 4603 grad_norm: 0.0000 loss: 208.3694 loss_cls: 84.9797 loss_bbox: 56.8273 loss_dfl: 66.5625
04/24 01:55:48 - mmengine - INFO - Exp name: yolo_world_v2_l_vlpan_bn_2e-4_80e_8gpus_mask-refine_prompt_tuning_coco_20240424_014520
04/24 01:55:48 - mmengine - INFO - Saving checkpoint at 10 epochs
04/24 01:55:55 - mmengine - INFO - Epoch(val) [10][ 50/1160] eta: 0:01:06 time: 0.0599 data_time: 0.0112 memory: 4656
04/24 01:55:58 - mmengine - INFO - Epoch(val) [10][ 100/1160] eta: 0:01:02 time: 0.0582 data_time: 0.0052 memory: 597
04/24 01:56:00 - mmengine - INFO - Epoch(val) [10][ 150/1160] eta: 0:00:54 time: 0.0449 data_time: 0.0015 memory: 597
04/24 01:56:02 - mmengine - INFO - Epoch(val) [10][ 200/1160] eta: 0:00:51 time: 0.0503 data_time: 0.0019 memory: 597
04/24 01:56:04 - mmengine - INFO - Epoch(val) [10][ 250/1160] eta: 0:00:45 time: 0.0367 data_time: 0.0011 memory: 597
04/24 01:56:07 - mmengine - INFO - Epoch(val) [10][ 300/1160] eta: 0:00:43 time: 0.0503 data_time: 0.0016 memory: 597
04/24 01:56:09 - mmengine - INFO - Epoch(val) [10][ 350/1160] eta: 0:00:38 time: 0.0362 data_time: 0.0013 memory: 597
04/24 01:56:11 - mmengine - INFO - Epoch(val) [10][ 400/1160] eta: 0:00:36 time: 0.0439 data_time: 0.0015 memory: 597
04/24 01:56:13 - mmengine - INFO - Epoch(val) [10][ 450/1160] eta: 0:00:33 time: 0.0496 data_time: 0.0020 memory: 597
04/24 01:56:16 - mmengine - INFO - Epoch(val) [10][ 500/1160] eta: 0:00:31 time: 0.0478 data_time: 0.0027 memory: 597
04/24 01:56:17 - mmengine - INFO - Epoch(val) [10][ 550/1160] eta: 0:00:28 time: 0.0350 data_time: 0.0013 memory: 597
04/24 01:56:20 - mmengine - INFO - Epoch(val) [10][ 600/1160] eta: 0:00:25 time: 0.0424 data_time: 0.0014 memory: 597
04/24 01:56:22 - mmengine - INFO - Epoch(val) [10][ 650/1160] eta: 0:00:23 time: 0.0520 data_time: 0.0028 memory: 597
04/24 01:56:25 - mmengine - INFO - Epoch(val) [10][ 700/1160] eta: 0:00:21 time: 0.0473 data_time: 0.0032 memory: 597
04/24 01:56:26 - mmengine - INFO - Epoch(val) [10][ 750/1160] eta: 0:00:18 time: 0.0333 data_time: 0.0008 memory: 597
04/24 01:56:29 - mmengine - INFO - Epoch(val) [10][ 800/1160] eta: 0:00:16 time: 0.0478 data_time: 0.0029 memory: 597
04/24 01:56:31 - mmengine - INFO - Epoch(val) [10][ 850/1160] eta: 0:00:14 time: 0.0504 data_time: 0.0018 memory: 597
04/24 01:56:33 - mmengine - INFO - Epoch(val) [10][ 900/1160] eta: 0:00:11 time: 0.0408 data_time: 0.0008 memory: 597
04/24 01:56:35 - mmengine - INFO - Epoch(val) [10][ 950/1160] eta: 0:00:09 time: 0.0361 data_time: 0.0014 memory: 597
04/24 01:56:37 - mmengine - INFO - Epoch(val) [10][1000/1160] eta: 0:00:07 time: 0.0407 data_time: 0.0009 memory: 597
04/24 01:56:40 - mmengine - INFO - Epoch(val) [10][1050/1160] eta: 0:00:05 time: 0.0546 data_time: 0.0041 memory: 597
04/24 01:56:42 - mmengine - INFO - Epoch(val) [10][1100/1160] eta: 0:00:02 time: 0.0513 data_time: 0.0018 memory: 597
04/24 01:56:44 - mmengine - INFO - Epoch(val) [10][1150/1160] eta: 0:00:00 time: 0.0306 data_time: 0.0010 memory: 597
04/24 01:56:44 - mmengine - INFO - Evaluating bbox...
Loading and preparing results...
04/24 01:56:44 - mmengine - ERROR - /opt/conda/lib/python3.10/site-packages/mmdet/evaluation/metrics/coco_metric.py - compute_metrics - 469 - The testing results of the whole dataset is empty.
04/24 01:56:44 - mmengine - INFO - Epoch(val) [10][1160/1160] data_time: 0.0023 time: 0.0450
I find that adding "load_from" to config file works for the problem above. But I'm not sure using which pretrained model file. It should be load_from = 'pretrained_models/yolo_world_v2_l_obj365v1_goldg_cc3mlite_pretrain-ca93cd1f.pth'
or load_from = 'pretrained_models/yolo_world_l_clip_base_dual_vlpan_2e-3adamw_32xb16_100e_o365_goldg_cc3mlite_train_pretrained-7a5eea3b.pth'
?
@Hstwhale Hello, can you use the weight and the corresponding prompt you have trained to predict the specified target?
@zhujiajian98 Using the model trained from 'pretrained_models/yolo_world_l_clip_base_dual_vlpan_2e-3adamw_32xb16_100e_o365_goldg_cc3mlite_train_pretrained-7a5eea3b.pth'
, I can predict the custom target, but the performance is much worse than fine-tuning directly.
@lin-whale Hello, what I'm wondering is if your trained model can make normal predictions, or is it just random predictions? What I suspect now is that there might be an evaluation function or if there is a problem that prevents the evaluation from being completed.
@zhujiajian98 After setting load_from = 'pretrained_models/yolo_world_l_clip_base_dual_vlpan_2e-3adamw_32xb16_100e_o365_goldg_cc3mlite_train_pretrained-7a5eea3b.pth'
, I have solved the problem of loss not decreasing during training, and the predicents are informative now. Just it is not as good as fine-tuning directly which is about 70% AP while the other is about 90% AP.
@lin-whale Do you mean that the yolo-world trained by prompt-tuning will be lower?
@lin-whale Do you mean that the yolo-world trained by prompt-tuning will be lower?
Yes
The training output:
] use_mask2refine = True val_ann_file = 'test/2024-03-28-ann-cvat/annotations/annotations.json' val_batch_size_per_gpu = 1 val_cfg = dict(type='ValLoop') val_data_prefix = 'test/2024-03-28-ann-cvat/images/' val_dataloader = dict( batch_size=1, dataset=dict( ann_file= '/data/cvat/train/2024-04-02-ann-cvat/annotations/annotations.json.test', batch_shapes_cfg=None, data_prefix=dict(img='images/'), data_root='/data/cvat/train/2024-04-02-ann-cvat', filter_cfg=dict(filter_empty_gt=False, min_size=32), metainfo=dict( classes=( 'floor', 'person', 'paper', 'bottle', 'paper cup', 'mask', 'thread', 'waiting bench', 'sturdy', 'plastic bag', 'table', 'packaging bag', 'door', 'carton box', 'sticker', 'screen', 'book', 'cotton ball', 'warning sign', 'rod', 'poster rack', 'vomit', 'blood', 'traffic cone', 'trash can', 'cart', 'rack', 'bag', 'flowerpot', 'medication', 'paper box', 'meal box', 'pericarp', 'hat', 'umbrella', 'drip stand', 'coffee stains', 'elevator entrance', 'escalator entrance', 'triage desk', 'registration machine', 'fire hydrant', 'hospital bed', 'milk stains', 'plinth', 'chair', 'wheel chair', 'swab', 'drinking cup', 'fallen leaves', )), pipeline=[ dict(backend_args=None, type='LoadImageFromFile'), dict(scale=( 640, 640, ), type='YOLOv5KeepRatioResize'), dict( allow_scale_up=False, pad_val=dict(img=114), scale=( 640, 640, ), type='LetterResize'), dict(_scope_='mmdet', type='LoadAnnotations', with_bbox=True), dict( meta_keys=( 'img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor', 'pad_param', ), type='mmdet.PackDetInputs'), ], test_mode=True, type='YOLOv5CocoDataset'), drop_last=False, num_workers=2, persistent_workers=True, pin_memory=True, sampler=dict(shuffle=False, type='DefaultSampler')) val_evaluator = dict( ann_file= '/data/cvat/train/2024-04-02-ann-cvat/annotations/annotations.json.test', classwise=True, metric='bbox', proposal_nums=( 100, 1, 10, ), type='mmdet.CocoMetric') val_interval_stage2 = 1 val_num_workers = 2 vis_backends = [ dict(type='LocalVisBackend'), ] visualizer = dict( name='visualizer', type='mmdet.DetLocalVisualizer', vis_backends=[ dict(type='LocalVisBackend'), ]) weight_decay = 0.05 widen_factor = 1.0 work_dir = './work_dirs/yolo_world_v2_l_vlpan_bn_2e-4_80e_8gpus_mask-refine_prompt_tuning_coco' 04/24 01:45:30 - mmengine - INFO - Distributed training is not used, all SyncBatchNorm (SyncBN) layers in the model will be automatically reverted to BatchNormXd layers if they are used. 04/24 01:45:30 - mmengine - INFO - Hooks will be executed in the following order: before_run: (VERY_HIGH ) RuntimeInfoHook (49 ) EMAHook (BELOW_NORMAL) LoggerHook -------------------- after_load_checkpoint: (49 ) EMAHook -------------------- before_train: (9 ) YOLOv5ParamSchedulerHook (VERY_HIGH ) RuntimeInfoHook (49 ) EMAHook (NORMAL ) IterTimerHook (VERY_LOW ) CheckpointHook -------------------- before_train_epoch: (VERY_HIGH ) RuntimeInfoHook (NORMAL ) IterTimerHook (NORMAL ) DistSamplerSeedHook (NORMAL ) PipelineSwitchHook -------------------- before_train_iter: (9 ) YOLOv5ParamSchedulerHook (VERY_HIGH ) RuntimeInfoHook (NORMAL ) IterTimerHook -------------------- after_train_iter: (9 ) YOLOv5ParamSchedulerHook (VERY_HIGH ) RuntimeInfoHook (49 ) EMAHook (NORMAL ) IterTimerHook (BELOW_NORMAL) LoggerHook (VERY_LOW ) CheckpointHook -------------------- after_train_epoch: (9 ) YOLOv5ParamSchedulerHook (NORMAL ) IterTimerHook (VERY_LOW ) CheckpointHook -------------------- before_val: (VERY_HIGH ) RuntimeInfoHook -------------------- before_val_epoch: (49 ) EMAHook (NORMAL ) IterTimerHook -------------------- before_val_iter: (NORMAL ) IterTimerHook -------------------- after_val_iter: (NORMAL ) IterTimerHook (NORMAL ) DetVisualizationHook (BELOW_NORMAL) LoggerHook -------------------- after_val_epoch: (9 ) YOLOv5ParamSchedulerHook (VERY_HIGH ) RuntimeInfoHook (49 ) EMAHook (NORMAL ) IterTimerHook (BELOW_NORMAL) LoggerHook (VERY_LOW ) CheckpointHook -------------------- after_val: (VERY_HIGH ) RuntimeInfoHook -------------------- before_save_checkpoint: (49 ) EMAHook -------------------- after_train: (VERY_HIGH ) RuntimeInfoHook (VERY_LOW ) CheckpointHook -------------------- before_test: (VERY_HIGH ) RuntimeInfoHook -------------------- before_test_epoch: (49 ) EMAHook (NORMAL ) IterTimerHook -------------------- before_test_iter: (NORMAL ) IterTimerHook -------------------- after_test_iter: (NORMAL ) IterTimerHook (NORMAL ) DetVisualizationHook (BELOW_NORMAL) LoggerHook -------------------- after_test_epoch: (VERY_HIGH ) RuntimeInfoHook (49 ) EMAHook (NORMAL ) IterTimerHook (BELOW_NORMAL) LoggerHook -------------------- after_test: (VERY_HIGH ) RuntimeInfoHook -------------------- after_run: (BELOW_NORMAL) LoggerHook -------------------- loading annotations into memory... Done (t=1.32s) creating index... index created! 04/24 01:45:34 - mmengine - INFO - paramwise_options -- embeddings:lr=0.002 04/24 01:45:34 - mmengine - INFO - paramwise_options -- embeddings:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.top_down_layers.0.main_conv.bn.weight:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.top_down_layers.0.main_conv.bn.bias:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.top_down_layers.0.final_conv.bn.weight:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.top_down_layers.0.final_conv.bn.bias:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.top_down_layers.0.blocks.0.conv1.bn.weight:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.top_down_layers.0.blocks.0.conv1.bn.bias:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.top_down_layers.0.blocks.0.conv2.bn.weight:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.top_down_layers.0.blocks.0.conv2.bn.bias:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.top_down_layers.0.blocks.1.conv1.bn.weight:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.top_down_layers.0.blocks.1.conv1.bn.bias:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.top_down_layers.0.blocks.1.conv2.bn.weight:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.top_down_layers.0.blocks.1.conv2.bn.bias:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.top_down_layers.0.blocks.2.conv1.bn.weight:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.top_down_layers.0.blocks.2.conv1.bn.bias:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.top_down_layers.0.blocks.2.conv2.bn.weight:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.top_down_layers.0.blocks.2.conv2.bn.bias:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.top_down_layers.0.attn_block.bias:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.top_down_layers.0.attn_block.guide_fc.bias:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.top_down_layers.0.attn_block.project_conv.bn.weight:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.top_down_layers.0.attn_block.project_conv.bn.bias:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.top_down_layers.1.main_conv.bn.weight:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.top_down_layers.1.main_conv.bn.bias:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.top_down_layers.1.final_conv.bn.weight:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.top_down_layers.1.final_conv.bn.bias:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.top_down_layers.1.blocks.0.conv1.bn.weight:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.top_down_layers.1.blocks.0.conv1.bn.bias:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.top_down_layers.1.blocks.0.conv2.bn.weight:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.top_down_layers.1.blocks.0.conv2.bn.bias:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.top_down_layers.1.blocks.1.conv1.bn.weight:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.top_down_layers.1.blocks.1.conv1.bn.bias:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.top_down_layers.1.blocks.1.conv2.bn.weight:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.top_down_layers.1.blocks.1.conv2.bn.bias:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.top_down_layers.1.blocks.2.conv1.bn.weight:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.top_down_layers.1.blocks.2.conv1.bn.bias:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.top_down_layers.1.blocks.2.conv2.bn.weight:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.top_down_layers.1.blocks.2.conv2.bn.bias:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.top_down_layers.1.attn_block.bias:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.top_down_layers.1.attn_block.guide_fc.bias:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.top_down_layers.1.attn_block.project_conv.bn.weight:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.top_down_layers.1.attn_block.project_conv.bn.bias:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.downsample_layers.0.bn.weight:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.downsample_layers.0.bn.bias:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.downsample_layers.1.bn.weight:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.downsample_layers.1.bn.bias:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.bottom_up_layers.0.main_conv.bn.weight:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.bottom_up_layers.0.main_conv.bn.bias:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.bottom_up_layers.0.final_conv.bn.weight:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.bottom_up_layers.0.final_conv.bn.bias:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.bottom_up_layers.0.blocks.0.conv1.bn.weight:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.bottom_up_layers.0.blocks.0.conv1.bn.bias:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.bottom_up_layers.0.blocks.0.conv2.bn.weight:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.bottom_up_layers.0.blocks.0.conv2.bn.bias:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.bottom_up_layers.0.blocks.1.conv1.bn.weight:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.bottom_up_layers.0.blocks.1.conv1.bn.bias:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.bottom_up_layers.0.blocks.1.conv2.bn.weight:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.bottom_up_layers.0.blocks.1.conv2.bn.bias:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.bottom_up_layers.0.blocks.2.conv1.bn.weight:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.bottom_up_layers.0.blocks.2.conv1.bn.bias:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.bottom_up_layers.0.blocks.2.conv2.bn.weight:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.bottom_up_layers.0.blocks.2.conv2.bn.bias:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.bottom_up_layers.0.attn_block.bias:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.bottom_up_layers.0.attn_block.guide_fc.bias:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.bottom_up_layers.0.attn_block.project_conv.bn.weight:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.bottom_up_layers.0.attn_block.project_conv.bn.bias:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.bottom_up_layers.1.main_conv.bn.weight:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.bottom_up_layers.1.main_conv.bn.bias:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.bottom_up_layers.1.final_conv.bn.weight:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.bottom_up_layers.1.final_conv.bn.bias:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.bottom_up_layers.1.blocks.0.conv1.bn.weight:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.bottom_up_layers.1.blocks.0.conv1.bn.bias:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.bottom_up_layers.1.blocks.0.conv2.bn.weight:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.bottom_up_layers.1.blocks.0.conv2.bn.bias:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.bottom_up_layers.1.blocks.1.conv1.bn.weight:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.bottom_up_layers.1.blocks.1.conv1.bn.bias:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.bottom_up_layers.1.blocks.1.conv2.bn.weight:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.bottom_up_layers.1.blocks.1.conv2.bn.bias:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.bottom_up_layers.1.blocks.2.conv1.bn.weight:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.bottom_up_layers.1.blocks.2.conv1.bn.bias:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.bottom_up_layers.1.blocks.2.conv2.bn.weight:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.bottom_up_layers.1.blocks.2.conv2.bn.bias:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.bottom_up_layers.1.attn_block.bias:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.bottom_up_layers.1.attn_block.guide_fc.bias:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.bottom_up_layers.1.attn_block.project_conv.bn.weight:weight_decay=0.0 04/24 01:45:34 - mmengine - INFO - paramwise_options -- neck.bottom_up_layers.1.attn_block.project_conv.bn.bias:weight_decay=0.0 loading annotations into memory... Done (t=0.25s) creating index... index created! loading annotations into memory... Done (t=0.20s) creating index... index created! 04/24 01:45:39 - mmengine - WARNING - "FileClient" will be deprecated in future. Please use io functions in https://mmengine.readthedocs.io/en/latest/api/fileio.html#file-io 04/24 01:45:39 - mmengine - WARNING - "HardDiskBackend" is the alias of "LocalBackend" and the former will be deprecated in future. 04/24 01:45:39 - mmengine - INFO - Checkpoints will be saved to /home/aistar/yolo-world/YOLO-World/work_dirs/yolo_world_v2_l_vlpan_bn_2e-4_80e_8gpus_mask-refine_prompt_tuning_coco. /opt/conda/lib/python3.10/site-packages/torch/functional.py:507: UserWarning: torch.meshgrid: in an upcoming release, it will be required to pass the indexing argument. (Triggered internally at /opt/conda/conda-bld/pytorch_1711403380909/work/aten/src/ATen/native/TensorShape.cpp:3549.) return _VF.meshgrid(tensors, **kwargs) # type: ignore[attr-defined] 04/24 01:45:59 - mmengine - INFO - Epoch(train) [1][ 50/290] base_lr: 2.0000e-03 lr: 9.8000e-05 eta: 3:15:16 time: 0.4047 data_time: 0.0721 memory: 10524 grad_norm: 0.0000 loss: 209.2231 loss_cls: 85.0021 loss_bbox: 57.6584 loss_dfl: 66.5625 04/24 01:46:11 - mmengine - INFO - Epoch(train) [1][100/290] base_lr: 2.0000e-03 lr: 1.9800e-04 eta: 2:36:04 time: 0.2433 data_time: 0.0477 memory: 4760 grad_norm: 0.0000 loss: 208.6729 loss_cls: 84.9814 loss_bbox: 57.1290 loss_dfl: 66.5625 04/24 01:46:20 - mmengine - INFO - Epoch(train) [1][150/290] base_lr: 2.0000e-03 lr: 2.9800e-04 eta: 2:12:05 time: 0.1761 data_time: 0.0129 memory: 4706 grad_norm: 0.0000 loss: 208.2608 loss_cls: 84.9480 loss_bbox: 56.7503 loss_dfl: 66.5625 04/24 01:46:29 - mmengine - INFO - Epoch(train) [1][200/290] base_lr: 2.0000e-03 lr: 3.9800e-04 eta: 2:00:17 time: 0.1783 data_time: 0.0173 memory: 4880 grad_norm: 0.0000 loss: 208.5585 loss_cls: 84.8515 loss_bbox: 57.1445 loss_dfl: 66.5625 04/24 01:46:37 - mmengine - INFO - Epoch(train) [1][250/290] base_lr: 2.0000e-03 lr: 4.9800e-04 eta: 1:52:12 time: 0.1685 data_time: 0.0032 memory: 4640 grad_norm: 0.0000 loss: 208.4256 loss_cls: 84.9207 loss_bbox: 56.9424 loss_dfl: 66.5625 04/24 01:46:45 - mmengine - INFO - Exp name: yolo_world_v2_l_vlpan_bn_2e-4_80e_8gpus_mask-refine_prompt_tuning_coco_20240424_014520 04/24 01:46:57 - mmengine - INFO - Epoch(train) [2][ 50/290] base_lr: 2.0000e-03 lr: 6.7129e-04 eta: 1:48:58 time: 0.2320 data_time: 0.0606 memory: 4920 grad_norm: 0.0000 loss: 208.7070 loss_cls: 85.1418 loss_bbox: 57.0027 loss_dfl: 66.5625 04/24 01:47:05 - mmengine - INFO - Epoch(train) [2][100/290] base_lr: 2.0000e-03 lr: 7.7030e-04 eta: 1:45:39 time: 0.1772 data_time: 0.0228 memory: 4640 grad_norm: 0.0000 loss: 209.1946 loss_cls: 85.3240 loss_bbox: 57.3081 loss_dfl: 66.5625 04/24 01:47:14 - mmengine - INFO - Epoch(train) [2][150/290] base_lr: 2.0000e-03 lr: 8.6931e-04 eta: 1:43:13 time: 0.1798 data_time: 0.0082 memory: 4706 grad_norm: 0.0000 loss: 208.6277 loss_cls: 85.0666 loss_bbox: 56.9986 loss_dfl: 66.5625 04/24 01:47:23 - mmengine - INFO - Epoch(train) [2][200/290] base_lr: 2.0000e-03 lr: 9.6832e-04 eta: 1:40:57 time: 0.1738 data_time: 0.0115 memory: 4560 grad_norm: 0.0000 loss: 209.1859 loss_cls: 85.0124 loss_bbox: 57.6110 loss_dfl: 66.5625 04/24 01:47:32 - mmengine - INFO - Epoch(train) [2][250/290] base_lr: 2.0000e-03 lr: 1.0673e-03 eta: 1:39:16 time: 0.1781 data_time: 0.0104 memory: 4520 grad_norm: 0.0000 loss: 208.7904 loss_cls: 85.1610 loss_bbox: 57.0668 loss_dfl: 66.5625 04/24 01:47:39 - mmengine - INFO - Exp name: yolo_world_v2_l_vlpan_bn_2e-4_80e_8gpus_mask-refine_prompt_tuning_coco_20240424_014520 04/24 01:47:51 - mmengine - INFO - Epoch(train) [3][ 50/290] base_lr: 2.0000e-03 lr: 1.2331e-03 eta: 1:39:06 time: 0.2327 data_time: 0.0540 memory: 4760 grad_norm: 0.0000 loss: 209.1261 loss_cls: 85.0435 loss_bbox: 57.5202 loss_dfl: 66.5625 04/24 01:48:00 - mmengine - INFO - Epoch(train) [3][100/290] base_lr: 2.0000e-03 lr: 1.3311e-03 eta: 1:37:54 time: 0.1798 data_time: 0.0118 memory: 4560 grad_norm: 0.0000 loss: 208.6577 loss_cls: 84.9895 loss_bbox: 57.1057 loss_dfl: 66.5625 04/24 01:48:09 - mmengine - INFO - Epoch(train) [3][150/290] base_lr: 2.0000e-03 lr: 1.4291e-03 eta: 1:36:45 time: 0.1771 data_time: 0.0109 memory: 4813 grad_norm: 0.0000 loss: 208.6249 loss_cls: 84.9350 loss_bbox: 57.1273 loss_dfl: 66.5625 04/24 01:48:18 - mmengine - INFO - Epoch(train) [3][200/290] base_lr: 2.0000e-03 lr: 1.5272e-03 eta: 1:35:39 time: 0.1746 data_time: 0.0081 memory: 4507 grad_norm: 0.0000 loss: 208.9671 loss_cls: 85.0744 loss_bbox: 57.3302 loss_dfl: 66.5625 04/24 01:48:26 - mmengine - INFO - Epoch(train) [3][250/290] base_lr: 2.0000e-03 lr: 1.6252e-03 eta: 1:34:13 time: 0.1587 data_time: 0.0127 memory: 4666 grad_norm: 0.0000 loss: 208.3120 loss_cls: 84.8422 loss_bbox: 56.9073 loss_dfl: 66.5625 04/24 01:48:34 - mmengine - INFO - Exp name: yolo_world_v2_l_vlpan_bn_2e-4_80e_8gpus_mask-refine_prompt_tuning_coco_20240424_014520 04/24 01:48:45 - mmengine - INFO - Epoch(train) [4][ 50/290] base_lr: 2.0000e-03 lr: 1.7834e-03 eta: 1:34:26 time: 0.2219 data_time: 0.0478 memory: 4733 grad_norm: 0.0000 loss: 208.5537 loss_cls: 84.9261 loss_bbox: 57.0651 loss_dfl: 66.5625 04/24 01:48:54 - mmengine - INFO - Epoch(train) [4][100/290] base_lr: 2.0000e-03 lr: 1.8804e-03 eta: 1:33:37 time: 0.1745 data_time: 0.0155 memory: 4666 grad_norm: 0.0000 loss: 208.6375 loss_cls: 85.1082 loss_bbox: 56.9668 loss_dfl: 66.5625 04/24 01:48:59 - mmengine - INFO - Exp name: yolo_world_v2_l_vlpan_bn_2e-4_80e_8gpus_mask-refine_prompt_tuning_coco_20240424_014520 04/24 01:49:02 - mmengine - INFO - Epoch(train) [4][150/290] base_lr: 2.0000e-03 lr: 1.9406e-03 eta: 1:32:50 time: 0.1735 data_time: 0.0153 memory: 4813 grad_norm: 0.0000 loss: 209.0560 loss_cls: 84.9764 loss_bbox: 57.5171 loss_dfl: 66.5625 04/24 01:49:11 - mmengine - INFO - Epoch(train) [4][200/290] base_lr: 2.0000e-03 lr: 1.9406e-03 eta: 1:32:11 time: 0.1771 data_time: 0.0158 memory: 4493 grad_norm: 0.0000 loss: 208.8431 loss_cls: 84.9746 loss_bbox: 57.3060 loss_dfl: 66.5625 04/24 01:49:20 - mmengine - INFO - Epoch(train) [4][250/290] base_lr: 2.0000e-03 lr: 1.9406e-03 eta: 1:31:45 time: 0.1847 data_time: 0.0158 memory: 4507 grad_norm: 0.0000 loss: 208.0752 loss_cls: 84.9316 loss_bbox: 56.5811 loss_dfl: 66.5625 04/24 01:49:27 - mmengine - INFO - Exp name: yolo_world_v2_l_vlpan_bn_2e-4_80e_8gpus_mask-refine_prompt_tuning_coco_20240424_014520 04/24 01:49:38 - mmengine - INFO - Epoch(train) [5][ 50/290] base_lr: 2.0000e-03 lr: 1.9406e-03 eta: 1:31:28 time: 0.2187 data_time: 0.0511 memory: 4466 grad_norm: 0.0000 loss: 208.7696 loss_cls: 85.0308 loss_bbox: 57.1764 loss_dfl: 66.5625 04/24 01:49:47 - mmengine - INFO - Epoch(train) [5][100/290] base_lr: 2.0000e-03 lr: 1.9406e-03 eta: 1:30:50 time: 0.1719 data_time: 0.0072 memory: 4613 grad_norm: 0.0000 loss: 208.6188 loss_cls: 84.9198 loss_bbox: 57.1365 loss_dfl: 66.5625 04/24 01:49:57 - mmengine - INFO - Epoch(train) [5][150/290] base_lr: 2.0000e-03 lr: 1.9406e-03 eta: 1:30:52 time: 0.2073 data_time: 0.0197 memory: 4840 grad_norm: 0.0000 loss: 209.2043 loss_cls: 85.1290 loss_bbox: 57.5128 loss_dfl: 66.5625 04/24 01:50:06 - mmengine - INFO - Epoch(train) [5][200/290] base_lr: 2.0000e-03 lr: 1.9406e-03 eta: 1:30:23 time: 0.1782 data_time: 0.0112 memory: 4746 grad_norm: 0.0000 loss: 208.6433 loss_cls: 84.9482 loss_bbox: 57.1325 loss_dfl: 66.5625 04/24 01:50:15 - mmengine - INFO - Epoch(train) [5][250/290] base_lr: 2.0000e-03 lr: 1.9406e-03 eta: 1:29:50 time: 0.1720 data_time: 0.0104 memory: 4480 grad_norm: 0.0000 loss: 209.0454 loss_cls: 85.0755 loss_bbox: 57.4073 loss_dfl: 66.5625 04/24 01:50:22 - mmengine - INFO - Exp name: yolo_world_v2_l_vlpan_bn_2e-4_80e_8gpus_mask-refine_prompt_tuning_coco_20240424_014520 04/24 01:50:22 - mmengine - INFO - Saving checkpoint at 5 epochs 04/24 01:50:24 - mmengine - WARNING - `save_param_scheduler` is True but `self.param_schedulers` is None, so skip saving parameter schedulers 04/24 01:50:29 - mmengine - INFO - Epoch(val) [5][ 50/1160] eta: 0:01:21 time: 0.0738 data_time: 0.0058 memory: 4533 04/24 01:50:30 - mmengine - INFO - Epoch(val) [5][ 100/1160] eta: 0:00:56 time: 0.0323 data_time: 0.0016 memory: 596 04/24 01:50:32 - mmengine - INFO - Epoch(val) [5][ 150/1160] eta: 0:00:45 time: 0.0303 data_time: 0.0006 memory: 596 04/24 01:50:33 - mmengine - INFO - Epoch(val) [5][ 200/1160] eta: 0:00:39 time: 0.0281 data_time: 0.0005 memory: 596 04/24 01:50:35 - mmengine - INFO - Epoch(val) [5][ 250/1160] eta: 0:00:34 time: 0.0263 data_time: 0.0012 memory: 596 04/24 01:50:36 - mmengine - INFO - Epoch(val) [5][ 300/1160] eta: 0:00:32 time: 0.0369 data_time: 0.0007 memory: 596 04/24 01:50:38 - mmengine - INFO - Epoch(val) [5][ 350/1160] eta: 0:00:29 time: 0.0250 data_time: 0.0005 memory: 596 04/24 01:50:39 - mmengine - INFO - Epoch(val) [5][ 400/1160] eta: 0:00:26 time: 0.0295 data_time: 0.0009 memory: 596 04/24 01:50:41 - mmengine - INFO - Epoch(val) [5][ 450/1160] eta: 0:00:24 time: 0.0288 data_time: 0.0006 memory: 596 04/24 01:50:42 - mmengine - INFO - Epoch(val) [5][ 500/1160] eta: 0:00:22 time: 0.0240 data_time: 0.0009 memory: 596 04/24 01:50:44 - mmengine - INFO - Epoch(val) [5][ 550/1160] eta: 0:00:20 time: 0.0361 data_time: 0.0011 memory: 596 04/24 01:50:45 - mmengine - INFO - Epoch(val) [5][ 600/1160] eta: 0:00:18 time: 0.0281 data_time: 0.0009 memory: 596 04/24 01:50:47 - mmengine - INFO - Epoch(val) [5][ 650/1160] eta: 0:00:16 time: 0.0318 data_time: 0.0012 memory: 596 04/24 01:50:49 - mmengine - INFO - Epoch(val) [5][ 700/1160] eta: 0:00:15 time: 0.0380 data_time: 0.0012 memory: 596 04/24 01:50:50 - mmengine - INFO - Epoch(val) [5][ 750/1160] eta: 0:00:13 time: 0.0320 data_time: 0.0007 memory: 596 04/24 01:50:52 - mmengine - INFO - Epoch(val) [5][ 800/1160] eta: 0:00:11 time: 0.0265 data_time: 0.0008 memory: 596 04/24 01:50:53 - mmengine - INFO - Epoch(val) [5][ 850/1160] eta: 0:00:10 time: 0.0225 data_time: 0.0012 memory: 596 04/24 01:50:54 - mmengine - INFO - Epoch(val) [5][ 900/1160] eta: 0:00:08 time: 0.0330 data_time: 0.0015 memory: 596 04/24 01:50:56 - mmengine - INFO - Epoch(val) [5][ 950/1160] eta: 0:00:06 time: 0.0290 data_time: 0.0009 memory: 596 04/24 01:50:58 - mmengine - INFO - Epoch(val) [5][1000/1160] eta: 0:00:05 time: 0.0367 data_time: 0.0016 memory: 596 04/24 01:50:59 - mmengine - INFO - Epoch(val) [5][1050/1160] eta: 0:00:03 time: 0.0285 data_time: 0.0007 memory: 596 04/24 01:51:00 - mmengine - INFO - Epoch(val) [5][1100/1160] eta: 0:00:01 time: 0.0283 data_time: 0.0008 memory: 596 04/24 01:51:02 - mmengine - INFO - Epoch(val) [5][1150/1160] eta: 0:00:00 time: 0.0357 data_time: 0.0015 memory: 596 04/24 01:51:02 - mmengine - INFO - Evaluating bbox... Loading and preparing results... 04/24 01:51:02 - mmengine - ERROR - /opt/conda/lib/python3.10/site-packages/mmdet/evaluation/metrics/coco_metric.py - compute_metrics - 469 - The testing results of the whole dataset is empty. 04/24 01:51:02 - mmengine - INFO - Epoch(val) [5][1160/1160] data_time: 0.0012 time: 0.0321 04/24 01:51:02 - mmengine - WARNING - Since `metrics` is an empty dict, the behavior to save the best checkpoint will be skipped in this evaluation. 04/24 01:51:15 - mmengine - INFO - Epoch(train) [6][ 50/290] base_lr: 2.0000e-03 lr: 1.9208e-03 eta: 1:30:04 time: 0.2459 data_time: 0.0610 memory: 4564 grad_norm: 0.0000 loss: 208.6450 loss_cls: 85.1488 loss_bbox: 56.9337 loss_dfl: 66.5625 04/24 01:51:24 - mmengine - INFO - Epoch(train) [6][100/290] base_lr: 2.0000e-03 lr: 1.9208e-03 eta: 1:29:36 time: 0.1752 data_time: 0.0077 memory: 4577 grad_norm: 0.0000 loss: 208.5121 loss_cls: 84.7862 loss_bbox: 57.1634 loss_dfl: 66.5625 04/24 01:51:34 - mmengine - INFO - Epoch(train) [6][150/290] base_lr: 2.0000e-03 lr: 1.9208e-03 eta: 1:29:31 time: 0.2024 data_time: 0.0244 memory: 4430 grad_norm: 0.0000 loss: 209.0030 loss_cls: 85.0959 loss_bbox: 57.3446 loss_dfl: 66.5625 04/24 01:51:42 - mmengine - INFO - Epoch(train) [6][200/290] base_lr: 2.0000e-03 lr: 1.9208e-03 eta: 1:29:05 time: 0.1756 data_time: 0.0224 memory: 4671 grad_norm: 0.0000 loss: 208.4982 loss_cls: 84.8802 loss_bbox: 57.0555 loss_dfl: 66.5625 04/24 01:51:51 - mmengine - INFO - Epoch(train) [6][250/290] base_lr: 2.0000e-03 lr: 1.9208e-03 eta: 1:28:40 time: 0.1770 data_time: 0.0078 memory: 4416 grad_norm: 0.0000 loss: 208.7319 loss_cls: 85.0956 loss_bbox: 57.0738 loss_dfl: 66.5625 04/24 01:51:59 - mmengine - INFO - Exp name: yolo_world_v2_l_vlpan_bn_2e-4_80e_8gpus_mask-refine_prompt_tuning_coco_20240424_014520 04/24 01:52:11 - mmengine - INFO - Epoch(train) [7][ 50/290] base_lr: 2.0000e-03 lr: 1.9010e-03 eta: 1:28:51 time: 0.2433 data_time: 0.0595 memory: 4643 grad_norm: 0.0000 loss: 209.1036 loss_cls: 85.1163 loss_bbox: 57.4247 loss_dfl: 66.5625 04/24 01:52:19 - mmengine - INFO - Epoch(train) [7][100/290] base_lr: 2.0000e-03 lr: 1.9010e-03 eta: 1:28:24 time: 0.1728 data_time: 0.0014 memory: 4443 grad_norm: 0.0000 loss: 208.8363 loss_cls: 84.8555 loss_bbox: 57.4183 loss_dfl: 66.5625 04/24 01:52:28 - mmengine - INFO - Epoch(train) [7][150/290] base_lr: 2.0000e-03 lr: 1.9010e-03 eta: 1:28:01 time: 0.1766 data_time: 0.0150 memory: 4643 grad_norm: 0.0000 loss: 209.2883 loss_cls: 85.2692 loss_bbox: 57.4565 loss_dfl: 66.5625 04/24 01:52:37 - mmengine - INFO - Epoch(train) [7][200/290] base_lr: 2.0000e-03 lr: 1.9010e-03 eta: 1:27:38 time: 0.1756 data_time: 0.0093 memory: 4590 grad_norm: 0.0000 loss: 209.3806 loss_cls: 85.1305 loss_bbox: 57.6876 loss_dfl: 66.5625 04/24 01:52:46 - mmengine - INFO - Epoch(train) [7][250/290] base_lr: 2.0000e-03 lr: 1.9010e-03 eta: 1:27:23 time: 0.1872 data_time: 0.0144 memory: 4656 grad_norm: 0.0000 loss: 209.1152 loss_cls: 84.9242 loss_bbox: 57.6285 loss_dfl: 66.5625 04/24 01:52:49 - mmengine - INFO - Exp name: yolo_world_v2_l_vlpan_bn_2e-4_80e_8gpus_mask-refine_prompt_tuning_coco_20240424_014520 04/24 01:52:54 - mmengine - INFO - Exp name: yolo_world_v2_l_vlpan_bn_2e-4_80e_8gpus_mask-refine_prompt_tuning_coco_20240424_014520 04/24 01:53:05 - mmengine - INFO - Epoch(train) [8][ 50/290] base_lr: 2.0000e-03 lr: 1.8812e-03 eta: 1:27:13 time: 0.2192 data_time: 0.0511 memory: 4630 grad_norm: 0.0000 loss: 209.1739 loss_cls: 85.0660 loss_bbox: 57.5453 loss_dfl: 66.5625 04/24 01:53:15 - mmengine - INFO - Epoch(train) [8][100/290] base_lr: 2.0000e-03 lr: 1.8812e-03 eta: 1:27:07 time: 0.2008 data_time: 0.0103 memory: 4443 grad_norm: 0.0000 loss: 208.7517 loss_cls: 85.0137 loss_bbox: 57.1755 loss_dfl: 66.5625 04/24 01:53:23 - mmengine - INFO - Epoch(train) [8][150/290] base_lr: 2.0000e-03 lr: 1.8812e-03 eta: 1:26:44 time: 0.1729 data_time: 0.0097 memory: 4523 grad_norm: 0.0000 loss: 208.3930 loss_cls: 85.0247 loss_bbox: 56.8058 loss_dfl: 66.5625 04/24 01:53:32 - mmengine - INFO - Epoch(train) [8][200/290] base_lr: 2.0000e-03 lr: 1.8812e-03 eta: 1:26:22 time: 0.1729 data_time: 0.0050 memory: 4910 grad_norm: 0.0000 loss: 208.6075 loss_cls: 85.0593 loss_bbox: 56.9856 loss_dfl: 66.5625 04/24 01:53:41 - mmengine - INFO - Epoch(train) [8][250/290] base_lr: 2.0000e-03 lr: 1.8812e-03 eta: 1:26:00 time: 0.1724 data_time: 0.0069 memory: 4603 grad_norm: 0.0000 loss: 208.7036 loss_cls: 85.0302 loss_bbox: 57.1108 loss_dfl: 66.5625 04/24 01:53:48 - mmengine - INFO - Exp name: yolo_world_v2_l_vlpan_bn_2e-4_80e_8gpus_mask-refine_prompt_tuning_coco_20240424_014520 04/24 01:54:00 - mmengine - INFO - Epoch(train) [9][ 50/290] base_lr: 2.0000e-03 lr: 1.8614e-03 eta: 1:26:08 time: 0.2425 data_time: 0.0541 memory: 4870 grad_norm: 0.0000 loss: 208.8394 loss_cls: 85.1671 loss_bbox: 57.1098 loss_dfl: 66.5625 04/24 01:54:09 - mmengine - INFO - Epoch(train) [9][100/290] base_lr: 2.0000e-03 lr: 1.8614e-03 eta: 1:25:47 time: 0.1731 data_time: 0.0069 memory: 4577 grad_norm: 0.0000 loss: 208.5417 loss_cls: 84.9505 loss_bbox: 57.0286 loss_dfl: 66.5625 04/24 01:54:18 - mmengine - INFO - Epoch(train) [9][150/290] base_lr: 2.0000e-03 lr: 1.8614e-03 eta: 1:25:27 time: 0.1738 data_time: 0.0166 memory: 4510 grad_norm: 0.0000 loss: 209.0994 loss_cls: 85.0374 loss_bbox: 57.4995 loss_dfl: 66.5625 04/24 01:54:27 - mmengine - INFO - Epoch(train) [9][200/290] base_lr: 2.0000e-03 lr: 1.8614e-03 eta: 1:25:08 time: 0.1765 data_time: 0.0088 memory: 4550 grad_norm: 0.0000 loss: 208.5768 loss_cls: 85.0928 loss_bbox: 56.9215 loss_dfl: 66.5625 04/24 01:54:36 - mmengine - INFO - Epoch(train) [9][250/290] base_lr: 2.0000e-03 lr: 1.8614e-03 eta: 1:25:01 time: 0.1972 data_time: 0.0190 memory: 4403 grad_norm: 0.0000 loss: 208.5594 loss_cls: 85.0345 loss_bbox: 56.9624 loss_dfl: 66.5625 04/24 01:54:44 - mmengine - INFO - Exp name: yolo_world_v2_l_vlpan_bn_2e-4_80e_8gpus_mask-refine_prompt_tuning_coco_20240424_014520 04/24 01:54:56 - mmengine - INFO - Epoch(train) [10][ 50/290] base_lr: 2.0000e-03 lr: 1.8416e-03 eta: 1:25:07 time: 0.2423 data_time: 0.0514 memory: 4991 grad_norm: 0.0000 loss: 208.5929 loss_cls: 85.0034 loss_bbox: 57.0269 loss_dfl: 66.5625 04/24 01:55:06 - mmengine - INFO - Epoch(train) [10][100/290] base_lr: 2.0000e-03 lr: 1.8416e-03 eta: 1:24:53 time: 0.1849 data_time: 0.0157 memory: 4443 grad_norm: 0.0000 loss: 209.1813 loss_cls: 85.0113 loss_bbox: 57.6075 loss_dfl: 66.5625 04/24 01:55:15 - mmengine - INFO - Epoch(train) [10][150/290] base_lr: 2.0000e-03 lr: 1.8416e-03 eta: 1:24:39 time: 0.1852 data_time: 0.0119 memory: 4630 grad_norm: 0.0000 loss: 209.0244 loss_cls: 85.0950 loss_bbox: 57.3670 loss_dfl: 66.5625 04/24 01:55:27 - mmengine - INFO - Epoch(train) [10][200/290] base_lr: 2.0000e-03 lr: 1.8416e-03 eta: 1:24:55 time: 0.2496 data_time: 0.0425 memory: 4991 grad_norm: 0.0000 loss: 208.7363 loss_cls: 84.8396 loss_bbox: 57.3342 loss_dfl: 66.5625 04/24 01:55:39 - mmengine - INFO - Epoch(train) [10][250/290] base_lr: 2.0000e-03 lr: 1.8416e-03 eta: 1:25:01 time: 0.2273 data_time: 0.0430 memory: 4603 grad_norm: 0.0000 loss: 208.3694 loss_cls: 84.9797 loss_bbox: 56.8273 loss_dfl: 66.5625 04/24 01:55:48 - mmengine - INFO - Exp name: yolo_world_v2_l_vlpan_bn_2e-4_80e_8gpus_mask-refine_prompt_tuning_coco_20240424_014520 04/24 01:55:48 - mmengine - INFO - Saving checkpoint at 10 epochs 04/24 01:55:55 - mmengine - INFO - Epoch(val) [10][ 50/1160] eta: 0:01:06 time: 0.0599 data_time: 0.0112 memory: 4656 04/24 01:55:58 - mmengine - INFO - Epoch(val) [10][ 100/1160] eta: 0:01:02 time: 0.0582 data_time: 0.0052 memory: 597 04/24 01:56:00 - mmengine - INFO - Epoch(val) [10][ 150/1160] eta: 0:00:54 time: 0.0449 data_time: 0.0015 memory: 597 04/24 01:56:02 - mmengine - INFO - Epoch(val) [10][ 200/1160] eta: 0:00:51 time: 0.0503 data_time: 0.0019 memory: 597 04/24 01:56:04 - mmengine - INFO - Epoch(val) [10][ 250/1160] eta: 0:00:45 time: 0.0367 data_time: 0.0011 memory: 597 04/24 01:56:07 - mmengine - INFO - Epoch(val) [10][ 300/1160] eta: 0:00:43 time: 0.0503 data_time: 0.0016 memory: 597 04/24 01:56:09 - mmengine - INFO - Epoch(val) [10][ 350/1160] eta: 0:00:38 time: 0.0362 data_time: 0.0013 memory: 597 04/24 01:56:11 - mmengine - INFO - Epoch(val) [10][ 400/1160] eta: 0:00:36 time: 0.0439 data_time: 0.0015 memory: 597 04/24 01:56:13 - mmengine - INFO - Epoch(val) [10][ 450/1160] eta: 0:00:33 time: 0.0496 data_time: 0.0020 memory: 597 04/24 01:56:16 - mmengine - INFO - Epoch(val) [10][ 500/1160] eta: 0:00:31 time: 0.0478 data_time: 0.0027 memory: 597 04/24 01:56:17 - mmengine - INFO - Epoch(val) [10][ 550/1160] eta: 0:00:28 time: 0.0350 data_time: 0.0013 memory: 597 04/24 01:56:20 - mmengine - INFO - Epoch(val) [10][ 600/1160] eta: 0:00:25 time: 0.0424 data_time: 0.0014 memory: 597 04/24 01:56:22 - mmengine - INFO - Epoch(val) [10][ 650/1160] eta: 0:00:23 time: 0.0520 data_time: 0.0028 memory: 597 04/24 01:56:25 - mmengine - INFO - Epoch(val) [10][ 700/1160] eta: 0:00:21 time: 0.0473 data_time: 0.0032 memory: 597 04/24 01:56:26 - mmengine - INFO - Epoch(val) [10][ 750/1160] eta: 0:00:18 time: 0.0333 data_time: 0.0008 memory: 597 04/24 01:56:29 - mmengine - INFO - Epoch(val) [10][ 800/1160] eta: 0:00:16 time: 0.0478 data_time: 0.0029 memory: 597 04/24 01:56:31 - mmengine - INFO - Epoch(val) [10][ 850/1160] eta: 0:00:14 time: 0.0504 data_time: 0.0018 memory: 597 04/24 01:56:33 - mmengine - INFO - Epoch(val) [10][ 900/1160] eta: 0:00:11 time: 0.0408 data_time: 0.0008 memory: 597 04/24 01:56:35 - mmengine - INFO - Epoch(val) [10][ 950/1160] eta: 0:00:09 time: 0.0361 data_time: 0.0014 memory: 597 04/24 01:56:37 - mmengine - INFO - Epoch(val) [10][1000/1160] eta: 0:00:07 time: 0.0407 data_time: 0.0009 memory: 597 04/24 01:56:40 - mmengine - INFO - Epoch(val) [10][1050/1160] eta: 0:00:05 time: 0.0546 data_time: 0.0041 memory: 597 04/24 01:56:42 - mmengine - INFO - Epoch(val) [10][1100/1160] eta: 0:00:02 time: 0.0513 data_time: 0.0018 memory: 597 04/24 01:56:44 - mmengine - INFO - Epoch(val) [10][1150/1160] eta: 0:00:00 time: 0.0306 data_time: 0.0010 memory: 597 04/24 01:56:44 - mmengine - INFO - Evaluating bbox... Loading and preparing results... 04/24 01:56:44 - mmengine - ERROR - /opt/conda/lib/python3.10/site-packages/mmdet/evaluation/metrics/coco_metric.py - compute_metrics - 469 - The testing results of the whole dataset is empty. 04/24 01:56:44 - mmengine - INFO - Epoch(val) [10][1160/1160] data_time: 0.0023 time: 0.0450 04/24 01:56:56 - mmengine - INFO - Epoch(train) [11][ 50/290] base_lr: 2.0000e-03 lr: 1.8218e-03 eta: 1:25:15 time: 0.2441 data_time: 0.0684 memory: 4510 grad_norm: 0.0000 loss: 208.4549 loss_cls: 85.0023 loss_bbox: 56.8901 loss_dfl: 66.5625 04/24 01:57:05 - mmengine - INFO - Exp name: yolo_world_v2_l_vlpan_bn_2e-4_80e_8gpus_mask-refine_prompt_tuning_coco_20240424_014520 04/24 01:57:05 - mmengine - INFO - Epoch(train) [11][100/290] base_lr: 2.0000e-03 lr: 1.8218e-03 eta: 1:24:55 time: 0.1744 data_time: 0.0112 memory: 4843 grad_norm: 0.0000 loss: 208.3745 loss_cls: 85.1314 loss_bbox: 56.6806 loss_dfl: 66.5625 04/24 01:57:14 - mmengine - INFO - Epoch(train) [11][150/290] base_lr: 2.0000e-03 lr: 1.8218e-03 eta: 1:24:38 time: 0.1796 data_time: 0.0189 memory: 4616 grad_norm: 0.0000 loss: 209.1933 loss_cls: 85.0257 loss_bbox: 57.6051 loss_dfl: 66.5625 04/24 01:57:23 - mmengine - INFO - Epoch(train) [11][200/290] base_lr: 2.0000e-03 lr: 1.8218e-03 eta: 1:24:20 time: 0.1740 data_time: 0.0064 memory: 4683 grad_norm: 0.0000 loss: 209.2356 loss_cls: 85.1660 loss_bbox: 57.5071 loss_dfl: 66.5625 04/24 01:57:33 - mmengine - INFO - Epoch(train) [11][250/290] base_lr: 2.0000e-03 lr: 1.8218e-03 eta: 1:24:12 time: 0.2011 data_time: 0.0146 memory: 4483 grad_norm: 0.0000 loss: 208.3271 loss_cls: 84.9534 loss_bbox: 56.8112 loss_dfl: 66.5625 04/24 01:57:40 - mmengine - INFO - Exp name: yolo_world_v2_l_vlpan_bn_2e-4_80e_8gpus_mask-refine_prompt_tuning_coco_20240424_014520 04/24 01:57:52 - mmengine - INFO - Epoch(train) [12][ 50/290] base_lr: 2.0000e-03 lr: 1.8020e-03 eta: 1:24:03 time: 0.2287 data_time: 0.0580 memory: 4456 grad_norm: 0.0000 loss: 209.1542 loss_cls: 85.0694 loss_bbox: 57.5223 loss_dfl: 66.5625 04/24 01:58:02 - mmengine - INFO - Epoch(train) [12][100/290] base_lr: 2.0000e-03 lr: 1.8020e-03 eta: 1:23:56 time: 0.2051 data_time: 0.0225 memory: 4430 grad_norm: 0.0000 loss: 208.7917 loss_cls: 84.9126 loss_bbox: 57.3165 loss_dfl: 66.5625 04/24 01:58:11 - mmengine - INFO - Epoch(train) [12][150/290] base_lr: 2.0000e-03 lr: 1.8020e-03 eta: 1:23:38 time: 0.1733 data_time: 0.0099 memory: 4683 grad_norm: 0.0000 loss: 208.8347 loss_cls: 84.9103 loss_bbox: 57.3619 loss_dfl: 66.5625 04/24 01:58:20 - mmengine - INFO - Epoch(train) [12][200/290] base_lr: 2.0000e-03 lr: 1.8020e-03 eta: 1:23:22 time: 0.1793 data_time: 0.0138 memory: 4656 grad_norm: 0.0000 loss: 209.0807 loss_cls: 85.1878 loss_bbox: 57.3304 loss_dfl: 66.5625 04/24 01:58:29 - mmengine - INFO - Epoch(train) [12][250/290] base_lr: 2.0000e-03 lr: 1.8020e-03 eta: 1:23:12 time: 0.1941 data_time: 0.0114 memory: 4963 grad_norm: 0.0000 loss: 208.7168 loss_cls: 85.1470 loss_bbox: 57.0073 loss_dfl: 66.5625 04/24 01:58:36 - mmengine - INFO - Exp name: yolo_world_v2_l_vlpan_bn_2e-4_80e_8gpus_mask-refine_prompt_tuning_coco_20240424_014520 04/24 01:58:48 - mmengine - INFO - Epoch(train) [13][ 50/290] base_lr: 2.0000e-03 lr: 1.7822e-03 eta: 1:22:59 time: 0.2293 data_time: 0.0632 memory: 4816 grad_norm: 0.0000 loss: 208.5295 loss_cls: 84.9813 loss_bbox: 56.9857 loss_dfl: 66.5625 04/24 01:58:57 - mmengine - INFO - Epoch(train) [13][100/290] base_lr: 2.0000e-03 lr: 1.7822e-03 eta: 1:22:45 time: 0.1838 data_time: 0.0203 memory: 4656 grad_norm: 0.0000 loss: 209.1732 loss_cls: 85.0846 loss_bbox: 57.5261 loss_dfl: 66.5625 04/24 01:59:06 - mmengine - INFO - Epoch(train) [13][150/290] base_lr: 2.0000e-03 lr: 1.7822e-03 eta: 1:22:29 time: 0.1767 data_time: 0.0126 memory: 4883 grad_norm: 0.0000 loss: 208.6712 loss_cls: 84.8874 loss_bbox: 57.2213 loss_dfl: 66.5625 04/24 01:59:15 - mmengine - INFO - Epoch(train) [13][200/290] base_lr: 2.0000e-03 lr: 1.7822e-03 eta: 1:22:13 time: 0.1783 data_time: 0.0155 memory: 4671 grad_norm: 0.0000 loss: 208.8264 loss_cls: 85.0155 loss_bbox: 57.2485 loss_dfl: 66.5625 04/24 01:59:24 - mmengine - INFO - Epoch(train) [13][250/290] base_lr: 2.0000e-03 lr: 1.7822e-03 eta: 1:22:04 time: 0.1963 data_time: 0.0159 memory: 4523 grad_norm: 0.0000 loss: 208.7948 loss_cls: 84.9801 loss_bbox: 57.2522 loss_dfl: 66.5625 04/24 01:59:32 - mmengine - INFO - Exp name: yolo_world_v2_l_vlpan_bn_2e-4_80e_8gpus_mask-refine_prompt_tuning_coco_20240424_014520 04/24 01:59:43 - mmengine - INFO - Epoch(train) [14][ 50/290] base_lr: 2.0000e-03 lr: 1.7624e-03 eta: 1:21:56 time: 0.2285 data_time: 0.0670 memory: 4630 grad_norm: 0.0000 loss: 209.0704 loss_cls: 85.0209 loss_bbox: 57.4870 loss_dfl: 66.5625 04/24 01:59:53 - mmengine - INFO - Epoch(train) [14][100/290] base_lr: 2.0000e-03 lr: 1.7624e-03 eta: 1:21:42 time: 0.1840 data_time: 0.0313 memory: 4590 grad_norm: 0.0000 loss: 208.8153 loss_cls: 85.0152 loss_bbox: 57.2376 loss_dfl: 66.5625 04/24 02:00:02 - mmengine - INFO - Epoch(train) [14][150/290] base_lr: 2.0000e-03 lr: 1.7624e-03 eta: 1:21:32 time: 0.1932 data_time: 0.0170 memory: 4723 grad_norm: 0.0000 loss: 208.6665 loss_cls: 84.9501 loss_bbox: 57.1539 loss_dfl: 66.5625 04/24 02:00:11 - mmengine - INFO - Epoch(train) [14][200/290] base_lr: 2.0000e-03 lr: 1.7624e-03 eta: 1:21:15 time: 0.1714 data_time: 0.0092 memory: 4550 grad_norm: 0.0000 loss: 208.7670 loss_cls: 84.8790 loss_bbox: 57.3255 loss_dfl: 66.5625 04/24 02:00:16 - mmengine - INFO - Exp name: yolo_world_v2_l_vlpan_bn_2e-4_80e_8gpus_mask-refine_prompt_tuning_coco_20240424_014520 04/24 02:00:20 - mmengine - INFO - Epoch(train) [14][250/290] base_lr: 2.0000e-03 lr: 1.7624e-03 eta: 1:20:59 time: 0.1763 data_time: 0.0140 memory: 4710 grad_norm: 0.0000 loss: 208.7337 loss_cls: 85.1405 loss_bbox: 57.0307 loss_dfl: 66.5625 04/24 02:00:27 - mmengine - INFO - Exp name: yolo_world_v2_l_vlpan_bn_2e-4_80e_8gpus_mask-refine_prompt_tuning_coco_20240424_014520 04/24 02:00:39 - mmengine - INFO - Epoch(train) [15][ 50/290] base_lr: 2.0000e-03 lr: 1.7426e-03 eta: 1:20:55 time: 0.2442 data_time: 0.0599 memory: 4750 grad_norm: 0.0000 loss: 208.3945 loss_cls: 84.8151 loss_bbox: 57.0169 loss_dfl: 66.5625 04/24 02:00:48 - mmengine - INFO - Epoch(train) [15][100/290] base_lr: 2.0000e-03 lr: 1.7426e-03 eta: 1:20:39 time: 0.1741 data_time: 0.0121 memory: 4376 grad_norm: 0.0000 loss: 208.8766 loss_cls: 85.1232 loss_bbox: 57.1910 loss_dfl: 66.5625 04/24 02:00:58 - mmengine - INFO - Epoch(train) [15][150/290] base_lr: 2.0000e-03 lr: 1.7426e-03 eta: 1:20:32 time: 0.2040 data_time: 0.0181 memory: 4590 grad_norm: 0.0000 loss: 209.0517 loss_cls: 84.9782 loss_bbox: 57.5111 loss_dfl: 66.5625 04/24 02:01:09 - mmengine - INFO - Epoch(train) [15][200/290] base_lr: 2.0000e-03 lr: 1.7426e-03 eta: 1:20:25 time: 0.2046 data_time: 0.0230 memory: 4564 grad_norm: 0.0000 loss: 209.3056 loss_cls: 85.0075 loss_bbox: 57.7356 loss_dfl: 66.5625 04/24 02:01:21 - mmengine - INFO - Epoch(train) [15][250/290] base_lr: 2.0000e-03 lr: 1.7426e-03 eta: 1:20:31 time: 0.2512 data_time: 0.0564 memory: 4416 grad_norm: 0.0000 loss: 209.2686 loss_cls: 85.1197 loss_bbox: 57.5864 loss_dfl: 66.5625 04/24 02:01:31 - mmengine - INFO - Exp name: yolo_world_v2_l_vlpan_bn_2e-4_80e_8gpus_mask-refine_prompt_tuning_coco_20240424_014520 04/24 02:01:31 - mmengine - INFO - Saving checkpoint at 15 epochs 04/24 02:01:38 - mmengine - INFO - Epoch(val) [15][ 50/1160] eta: 0:01:05 time: 0.0589 data_time: 0.0029 memory: 4496 04/24 02:01:40 - mmengine - INFO - Epoch(val) [15][ 100/1160] eta: 0:00:54 time: 0.0442 data_time: 0.0024 memory: 597 04/24 02:01:42 - mmengine - INFO - Epoch(val) [15][ 150/1160] eta: 0:00:46 time: 0.0357 data_time: 0.0012 memory: 597 04/24 02:01:44 - mmengine - INFO - Epoch(val) [15][ 200/1160] eta: 0:00:44 time: 0.0471 data_time: 0.0045 memory: 597 04/24 02:01:47 - mmengine - INFO - Epoch(val) [15][ 250/1160] eta: 0:00:43 time: 0.0518 data_time: 0.0045 memory: 597 04/24 02:01:49 - mmengine - INFO - Epoch(val) [15][ 300/1160] eta: 0:00:40 time: 0.0456 data_time: 0.0017 memory: 597 04/24 02:01:51 - mmengine - INFO - Epoch(val) [15][ 350/1160] eta: 0:00:36 time: 0.0335 data_time: 0.0009 memory: 597 04/24 02:01:53 - mmengine - INFO - Epoch(val) [15][ 400/1160] eta: 0:00:34 time: 0.0433 data_time: 0.0007 memory: 597 04/24 02:01:56 - mmengine - INFO - Epoch(val) [15][ 450/1160] eta: 0:00:32 time: 0.0528 data_time: 0.0018 memory: 597 04/24 02:01:58 - mmengine - INFO - Epoch(val) [15][ 500/1160] eta: 0:00:29 time: 0.0403 data_time: 0.0016 memory: 597 04/24 02:02:00 - mmengine - INFO - Epoch(val) [15][ 550/1160] eta: 0:00:27 time: 0.0457 data_time: 0.0019 memory: 597 04/24 02:02:02 - mmengine - INFO - Epoch(val) [15][ 600/1160] eta: 0:00:25 time: 0.0471 data_time: 0.0036 memory: 597 04/24 02:02:05 - mmengine - INFO - Epoch(val) [15][ 650/1160] eta: 0:00:23 time: 0.0473 data_time: 0.0035 memory: 597 04/24 02:02:07 - mmengine - INFO - Epoch(val) [15][ 700/1160] eta: 0:00:20 time: 0.0431 data_time: 0.0027 memory: 597 04/24 02:02:09 - mmengine - INFO - Epoch(val) [15][ 750/1160] eta: 0:00:18 time: 0.0405 data_time: 0.0014 memory: 597 04/24 02:02:11 - mmengine - INFO - Epoch(val) [15][ 800/1160] eta: 0:00:16 time: 0.0479 data_time: 0.0021 memory: 597 04/24 02:02:13 - mmengine - INFO - Epoch(val) [15][ 850/1160] eta: 0:00:13 time: 0.0422 data_time: 0.0011 memory: 597 04/24 02:02:16 - mmengine - INFO - Epoch(val) [15][ 900/1160] eta: 0:00:11 time: 0.0440 data_time: 0.0019 memory: 597 04/24 02:02:18 - mmengine - INFO - Epoch(val) [15][ 950/1160] eta: 0:00:09 time: 0.0458 data_time: 0.0009 memory: 597 04/24 02:02:21 - mmengine - INFO - Epoch(val) [15][1000/1160] eta: 0:00:07 time: 0.0537 data_time: 0.0014 memory: 597 04/24 02:02:23 - mmengine - INFO - Epoch(val) [15][1050/1160] eta: 0:00:04 time: 0.0368 data_time: 0.0008 memory: 597 04/24 02:02:24 - mmengine - INFO - Epoch(val) [15][1100/1160] eta: 0:00:02 time: 0.0394 data_time: 0.0014 memory: 597 04/24 02:02:27 - mmengine - INFO - Epoch(val) [15][1150/1160] eta: 0:00:00 time: 0.0587 data_time: 0.0041 memory: 597 04/24 02:02:28 - mmengine - INFO - Evaluating bbox... Loading and preparing results... 04/24 02:02:28 - mmengine - ERROR - /opt/conda/lib/python3.10/site-packages/mmdet/evaluation/metrics/coco_metric.py - compute_metrics - 469 - The testing results of the whole dataset is empty. 04/24 02:02:28 - mmengine - INFO - Epoch(val) [15][1160/1160] data_time: 0.0021 time: 0.0455 04/24 02:02:43 - mmengine - INFO - Epoch(train) [16][ 50/290] base_lr: 2.0000e-03 lr: 1.7228e-03 eta: 1:20:54 time: 0.2946 data_time: 0.0972 memory: 4671 grad_norm: 0.0000 loss: 208.6796 loss_cls: 84.9651 loss_bbox: 57.1520 loss_dfl: 66.5625 04/24 02:02:51 - mmengine - INFO - Epoch(train) [16][100/290] base_lr: 2.0000e-03 lr: 1.7228e-03 eta: 1:20:37 time: 0.1723 data_time: 0.0100 memory: 4416 grad_norm: 0.0000 loss: 208.9665 loss_cls: 84.9839 loss_bbox: 57.4201 loss_dfl: 66.5625 04/24 02:03:00 - mmengine - INFO - Epoch(train) [16][150/290] base_lr: 2.0000e-03 lr: 1.7228e-03 eta: 1:20:22 time: 0.1783 data_time: 0.0146 memory: 4443 grad_norm: 0.0000 loss: 209.0466 loss_cls: 84.9119 loss_bbox: 57.5722 loss_dfl: 66.5625 04/24 02:03:09 - mmengine - INFO - Epoch(train) [16][200/290] base_lr: 2.0000e-03 lr: 1.7228e-03 eta: 1:20:08 time: 0.1792 data_time: 0.0081 memory: 4843 grad_norm: 0.0000 loss: 208.4161 loss_cls: 85.0224 loss_bbox: 56.8312 loss_dfl: 66.5625 04/24 02:03:19 - mmengine - INFO - Epoch(train) [16][250/290] base_lr: 2.0000e-03 lr: 1.7228e-03 eta: 1:19:57 time: 0.1959 data_time: 0.0164 memory: 4696 grad_norm: 0.0000 loss: 208.5272 loss_cls: 84.8246 loss_bbox: 57.1402 loss_dfl: 66.5625 04/24 02:03:26 - mmengine - INFO - Exp name: yolo_world_v2_l_vlpan_bn_2e-4_80e_8gpus_mask-refine_prompt_tuning_coco_20240424_014520 04/24 02:03:38 - mmengine - INFO - Epoch(train) [17][ 50/290] base_lr: 2.0000e-03 lr: 1.7030e-03 eta: 1:19:44 time: 0.2323 data_time: 0.0627 memory: 4483 grad_norm: nan loss: 209.1800 loss_cls: 85.1599 loss_bbox: 57.4576 loss_dfl: 66.5625 04/24 02:03:47 - mmengine - INFO - Epoch(train) [17][100/290] base_lr: 2.0000e-03 lr: 1.7030e-03 eta: 1:19:30 time: 0.1785 data_time: 0.0159 memory: 4310 grad_norm: 0.0000 loss: 208.8468 loss_cls: 85.0197 loss_bbox: 57.2646 loss_dfl: 66.5625 04/24 02:03:56 - mmengine - INFO - Epoch(train) [17][150/290] base_lr: 2.0000e-03 lr: 1.7030e-03 eta: 1:19:20 time: 0.1976 data_time: 0.0191 memory: 4564 grad_norm: 0.0000 loss: 209.0484 loss_cls: 85.0211 loss_bbox: 57.4648 loss_dfl: 66.5625 04/24 02:04:05 - mmengine - INFO - Epoch(train) [17][200/290] base_lr: 2.0000e-03 lr: 1.7030e-03 eta: 1:19:05 time: 0.1754 data_time: 0.0068 memory: 4483 grad_norm: 0.0000 loss: 208.7286 loss_cls: 85.0010 loss_bbox: 57.1650 loss_dfl: 66.5625 04/24 02:04:14 - mmengine - INFO - Epoch(train) [17][250/290] base_lr: 2.0000e-03 lr: 1.7030e-03 eta: 1:18:52 time: 0.1826 data_time: 0.0121 memory: 4564 grad_norm: 0.0000 loss: 209.3213 loss_cls: 85.0166 loss_bbox: 57.7422 loss_dfl: 66.5625 04/24 02:04:21 - mmengine - INFO - Exp name: yolo_world_v2_l_vlpan_bn_2e-4_80e_8gpus_mask-refine_prompt_tuning_coco_20240424_014520 04/24 02:04:33 - mmengine - INFO - Epoch(train) [18][ 50/290] base_lr: 2.0000e-03 lr: 1.6832e-03 eta: 1:18:36 time: 0.2400 data_time: 0.0650 memory: 4656 grad_norm: 0.0000 loss: 209.3484 loss_cls: 85.1667 loss_bbox: 57.6192 loss_dfl: 66.5625 04/24 02:04:36 - mmengine - INFO - Exp name: yolo_world_v2_l_vlpan_bn_2e-4_80e_8gpus_mask-refine_prompt_tuning_coco_20240424_014520 04/24 02:04:41 - mmengine - INFO - Epoch(train) [18][100/290] base_lr: 2.0000e-03 lr: 1.6832e-03 eta: 1:18:21 time: 0.1733 data_time: 0.0191 memory: 4603 grad_norm: 0.0000 loss: 208.7014 loss_cls: 84.9687 loss_bbox: 57.1701 loss_dfl: 66.5625 04/24 02:04:50 - mmengine - INFO - Epoch(train) [18][150/290] base_lr: 2.0000e-03 lr: 1.6832e-03 eta: 1:18:07 time: 0.1802 data_time: 0.0135 memory: 4616 grad_norm: 0.0000 loss: 208.5786 loss_cls: 84.9700 loss_bbox: 57.0462 loss_dfl: 66.5625 04/24 02:05:00 - mmengine - INFO - Epoch(train) [18][200/290] base_lr: 2.0000e-03 lr: 1.6832e-03 eta: 1:17:57 time: 0.1919 data_time: 0.0219 memory: 4656 grad_norm: 0.0000 loss: 208.4855 loss_cls: 85.0584 loss_bbox: 56.8646 loss_dfl: 66.5625 04/24 02:05:09 - mmengine - INFO - Epoch(train) [18][250/290] base_lr: 2.0000e-03 lr: 1.6832e-03 eta: 1:17:43 time: 0.1810 data_time: 0.0085 memory: 4603 grad_norm: 0.0000 loss: 208.9385 loss_cls: 84.8069 loss_bbox: 57.5691 loss_dfl: 66.5625 04/24 02:05:17 - mmengine - INFO - Exp name: yolo_world_v2_l_vlpan_bn_2e-4_80e_8gpus_mask-refine_prompt_tuning_coco_20240424_014520 04/24 02:05:29 - mmengine - INFO - Epoch(train) [19][ 50/290] base_lr: 2.0000e-03 lr: 1.6634e-03 eta: 1:17:34 time: 0.2383 data_time: 0.0762 memory: 4483 grad_norm: 0.0000 loss: 208.8464 loss_cls: 84.9651 loss_bbox: 57.3188 loss_dfl: 66.5625 04/24 02:05:38 - mmengine - INFO - Epoch(train) [19][100/290] base_lr: 2.0000e-03 lr: 1.6634e-03 eta: 1:17:22 time: 0.1839 data_time: 0.0159 memory: 4816 grad_norm: 0.0000 loss: 208.7131 loss_cls: 84.9737 loss_bbox: 57.1768 loss_dfl: 66.5625 04/24 02:05:47 - mmengine - INFO - Epoch(train) [19][150/290] base_lr: 2.0000e-03 lr: 1.6634e-03 eta: 1:17:07 time: 0.1751 data_time: 0.0111 memory: 4603 grad_norm: 0.0000 loss: 209.0014 loss_cls: 84.9643 loss_bbox: 57.4746 loss_dfl: 66.5625 04/24 02:05:55 - mmengine - INFO - Epoch(train) [19][200/290] base_lr: 2.0000e-03 lr: 1.6634e-03 eta: 1:16:53 time: 0.1761 data_time: 0.0116 memory: 4470 grad_norm: 0.0000 loss: 208.7032 loss_cls: 84.9951 loss_bbox: 57.1456 loss_dfl: 66.5625 04/24 02:06:05 - mmengine - INFO - Epoch(train) [19][250/290] base_lr: 2.0000e-03 lr: 1.6634e-03 eta: 1:16:44 time: 0.1964 data_time: 0.0228 memory: 4656 grad_norm: 0.0000 loss: 208.4092 loss_cls: 85.0487 loss_bbox: 56.7980 loss_dfl: 66.5625 04/24 02:06:13 - mmengine - INFO - Exp name: yolo_world_v2_l_vlpan_bn_2e-4_80e_8gpus_mask-refine_prompt_tuning_coco_20240424_014520 04/24 02:06:24 - mmengine - INFO - Epoch(train) [20][ 50/290] base_lr: 2.0000e-03 lr: 1.6436e-03 eta: 1:16:32 time: 0.2279 data_time: 0.0566 memory: 4830 grad_norm: 0.0000 loss: 208.7113 loss_cls: 84.9710 loss_bbox: 57.1777 loss_dfl: 66.5625 04/24 02:06:33 - mmengine - INFO - Epoch(train) [20][100/290] base_lr: 2.0000e-03 lr: 1.6436e-03 eta: 1:16:18 time: 0.1735 data_time: 0.0090 memory: 4590 grad_norm: 0.0000 loss: 208.7369 loss_cls: 85.0895 loss_bbox: 57.0849 loss_dfl: 66.5625 04/24 02:06:43 - mmengine - INFO - Epoch(train) [20][150/290] base_lr: 2.0000e-03 lr: 1.6436e-03 eta: 1:16:09 time: 0.2039 data_time: 0.0207 memory: 5030 grad_norm: 0.0000 loss: 208.9192 loss_cls: 85.0290 loss_bbox: 57.3277 loss_dfl: 66.5625 04/24 02:06:52 - mmengine - INFO - Epoch(train) [20][200/290] base_lr: 2.0000e-03 lr: 1.6436e-03 eta: 1:15:55 time: 0.1741 data_time: 0.0130 memory: 4603 grad_norm: 0.0000 loss: 208.9762 loss_cls: 85.0598 loss_bbox: 57.3539 loss_dfl: 66.5625 04/24 02:07:00 - mmengine - INFO - Epoch(train) [20][250/290] base_lr: 2.0000e-03 lr: 1.6436e-03 eta: 1:15:38 time: 0.1600 data_time: 0.0052 memory: 4536 grad_norm: 0.0000 loss: 208.7317 loss_cls: 84.9639 loss_bbox: 57.2053 loss_dfl: 66.5625 04/24 02:07:07 - mmengine - INFO - Exp name: yolo_world_v2_l_vlpan_bn_2e-4_80e_8gpus_mask-refine_prompt_tuning_coco_20240424_014520 04/24 02:07:07 - mmengine - INFO - Saving checkpoint at 20 epochs 04/24 02:07:11 - mmengine - INFO - Epoch(val) [20][ 50/1160] eta: 0:00:32 time: 0.0297 data_time: 0.0012 memory: 4496 04/24 02:07:13 - mmengine - INFO - Epoch(val) [20][ 100/1160] eta: 0:00:31 time: 0.0299 data_time: 0.0010 memory: 597 04/24 02:07:15 - mmengine - INFO - Epoch(val) [20][ 150/1160] eta: 0:00:32 time: 0.0365 data_time: 0.0014 memory: 597 04/24 02:07:17 - mmengine - INFO - Epoch(val) [20][ 200/1160] eta: 0:00:31 time: 0.0338 data_time: 0.0009 memory: 597 04/24 02:07:18 - mmengine - INFO - Epoch(val) [20][ 250/1160] eta: 0:00:28 time: 0.0254 data_time: 0.0005 memory: 597 04/24 02:07:19 - mmengine - INFO - Epoch(val) [20][ 300/1160] eta: 0:00:25 time: 0.0213 data_time: 0.0004 memory: 597 04/24 02:07:20 - mmengine - INFO - Epoch(val) [20][ 350/1160] eta: 0:00:23 time: 0.0272 data_time: 0.0020 memory: 597 04/24 02:07:22 - mmengine - INFO - Epoch(val) [20][ 400/1160] eta: 0:00:22 time: 0.0338 data_time: 0.0010 memory: 597 04/24 02:07:23 - mmengine - INFO - Epoch(val) [20][ 450/1160] eta: 0:00:21 time: 0.0286 data_time: 0.0008 memory: 597 04/24 02:07:25 - mmengine - INFO - Epoch(val) [20][ 500/1160] eta: 0:00:19 time: 0.0349 data_time: 0.0010 memory: 597 04/24 02:07:26 - mmengine - INFO - Epoch(val) [20][ 550/1160] eta: 0:00:18 time: 0.0253 data_time: 0.0006 memory: 597 04/24 02:07:28 - mmengine - INFO - Epoch(val) [20][ 600/1160] eta: 0:00:16 time: 0.0319 data_time: 0.0005 memory: 597 04/24 02:07:29 - mmengine - INFO - Epoch(val) [20][ 650/1160] eta: 0:00:15 time: 0.0300 data_time: 0.0012 memory: 597 04/24 02:07:31 - mmengine - INFO - Epoch(val) [20][ 700/1160] eta: 0:00:13 time: 0.0247 data_time: 0.0009 memory: 597 04/24 02:07:32 - mmengine - INFO - Epoch(val) [20][ 750/1160] eta: 0:00:12 time: 0.0296 data_time: 0.0010 memory: 597 04/24 02:07:34 - mmengine - INFO - Epoch(val) [20][ 800/1160] eta: 0:00:10 time: 0.0292 data_time: 0.0008 memory: 597 04/24 02:07:35 - mmengine - INFO - Epoch(val) [20][ 850/1160] eta: 0:00:09 time: 0.0273 data_time: 0.0014 memory: 597 04/24 02:07:37 - mmengine - INFO - Epoch(val) [20][ 900/1160] eta: 0:00:07 time: 0.0338 data_time: 0.0008 memory: 597 04/24 02:07:38 - mmengine - INFO - Epoch(val) [20][ 950/1160] eta: 0:00:06 time: 0.0229 data_time: 0.0014 memory: 597 04/24 02:07:40 - mmengine - INFO - Epoch(val) [20][1000/1160] eta: 0:00:04 time: 0.0340 data_time: 0.0011 memory: 597 04/24 02:07:41 - mmengine - INFO - Epoch(val) [20][1050/1160] eta: 0:00:03 time: 0.0282 data_time: 0.0005 memory: 597 04/24 02:07:43 - mmengine - INFO - Epoch(val) [20][1100/1160] eta: 0:00:01 time: 0.0295 data_time: 0.0011 memory: 597 04/24 02:07:44 - mmengine - INFO - Epoch(val) [20][1150/1160] eta: 0:00:00 time: 0.0355 data_time: 0.0016 memory: 597 04/24 02:07:45 - mmengine - INFO - Evaluating bbox... Loading and preparing results... 04/24 02:07:45 - mmengine - ERROR - /opt/conda/lib/python3.10/site-packages/mmdet/evaluation/metrics/coco_metric.py - compute_metrics - 469 - The testing results of the whole dataset is empty. 04/24 02:07:45 - mmengine - INFO - Epoch(val) [20][1160/1160] data_time: 0.0010 time: 0.0297 04/24 02:07:55 - mmengine - INFO - Epoch(train) [21][ 50/290] base_lr: 2.0000e-03 lr: 1.6238e-03 eta: 1:15:20 time: 0.2167 data_time: 0.0588 memory: 4750 grad_norm: 0.0000 loss: 208.4572 loss_cls: 85.0767 loss_bbox: 56.8180 loss_dfl: 66.5625 04/24 02:08:05 - mmengine - INFO - Epoch(train) [21][100/290] base_lr: 2.0000e-03 lr: 1.6238e-03 eta: 1:15:08 time: 0.1841 data_time: 0.0152 memory: 4577 grad_norm: 0.0000 loss: 209.0636 loss_cls: 85.0925 loss_bbox: 57.4086 loss_dfl: 66.5625 04/24 02:08:13 - mmengine - INFO - Epoch(train) [21][150/290] base_lr: 2.0000e-03 lr: 1.6238e-03 eta: 1:14:53 time: 0.1676 data_time: 0.0060 memory: 4443 grad_norm: 0.0000 loss: 209.1516 loss_cls: 85.0927 loss_bbox: 57.4963 loss_dfl: 66.5625 04/24 02:08:23 - mmengine - INFO - Exp name: yolo_world_v2_l_vlpan_bn_2e-4_80e_8gpus_mask-refine_prompt_tuning_coco_20240424_014520 04/24 02:08:23 - mmengine - INFO - Epoch(train) [21][200/290] base_lr: 2.0000e-03 lr: 1.6238e-03 eta: 1:14:42 time: 0.1913 data_time: 0.0171 memory: 4523 grad_norm: 0.0000 loss: 208.8522 loss_cls: 85.0758 loss_bbox: 57.2139 loss_dfl: 66.5625 04/24 02:08:31 - mmengine - INFO - Epoch(train) [21][250/290] base_lr: 2.0000e-03 lr: 1.6238e-03 eta: 1:14:28 time: 0.1692 data_time: 0.0117 memory: 4803 grad_norm: 0.0000 loss: 209.1729 loss_cls: 85.0918 loss_bbox: 57.5187 loss_dfl: 66.5625 04/24 02:08:37 - mmengine - INFO - Exp name: yolo_world_v2_l_vlpan_bn_2e-4_80e_8gpus_mask-refine_prompt_tuning_coco_20240424_014520 04/24 02:08:49 - mmengine - INFO - Epoch(train) [22][ 50/290] base_lr: 2.0000e-03 lr: 1.6040e-03 eta: 1:14:12 time: 0.2349 data_time: 0.0747 memory: 4616 grad_norm: 0.0000 loss: 209.1220 loss_cls: 85.1554 loss_bbox: 57.4042 loss_dfl: 66.5625 04/24 02:08:58 - mmengine - INFO - Epoch(train) [22][100/290] base_lr: 2.0000e-03 lr: 1.6040e-03 eta: 1:13:58 time: 0.1720 data_time: 0.0159 memory: 4603 grad_norm: 0.0000 loss: 208.8812 loss_cls: 84.9711 loss_bbox: 57.3476 loss_dfl: 66.5625 04/24 02:09:07 - mmengine - INFO - Epoch(train) [22][150/290] base_lr: 2.0000e-03 lr: 1.6040e-03 eta: 1:13:46 time: 0.1818 data_time: 0.0124 memory: 4656 grad_norm: 0.0000 loss: 209.3741 loss_cls: 85.0804 loss_bbox: 57.7312 loss_dfl: 66.5625 04/24 02:09:16 - mmengine - INFO - Epoch(train) [22][200/290] base_lr: 2.0000e-03 lr: 1.6040e-03 eta: 1:13:33 time: 0.1776 data_time: 0.0132 memory: 4656 grad_norm: 0.0000 loss: 208.6347 loss_cls: 85.1128 loss_bbox: 56.9594 loss_dfl: 66.5625 04/24 02:09:24 - mmengine - INFO - Epoch(train) [22][250/290] base_lr: 2.0000e-03 lr: 1.6040e-03 eta: 1:13:19 time: 0.1691 data_time: 0.0042 memory: 4577 grad_norm: 0.0000 loss: 208.9543 loss_cls: 85.1298 loss_bbox: 57.2620 loss_dfl: 66.5625 04/24 02:09:30 - mmengine - INFO - Exp name: yolo_world_v2_l_vlpan_bn_2e-4_80e_8gpus_mask-refine_prompt_tuning_coco_20240424_014520 04/24 02:09:42 - mmengine - INFO - Epoch(train) [23][ 50/290] base_lr: 2.0000e-03 lr: 1.5842e-03 eta: 1:13:03 time: 0.2389 data_time: 0.0685 memory: 4843 grad_norm: 0.0000 loss: 208.9363 loss_cls: 85.0100 loss_bbox: 57.3638 loss_dfl: 66.5625 04/24 02:09:51 - mmengine - INFO - Epoch(train) [23][100/290] base_lr: 2.0000e-03 lr: 1.5842e-03 eta: 1:12:50 time: 0.1721 data_time: 0.0099 memory: 4483 grad_norm: 0.0000 loss: 208.9689 loss_cls: 84.9381 loss_bbox: 57.4683 loss_dfl: 66.5625 04/24 02:10:00 - mmengine - INFO - Epoch(train) [23][150/290] base_lr: 2.0000e-03 lr: 1.5842e-03 eta: 1:12:36 time: 0.1684 data_time: 0.0103 memory: 4883 grad_norm: 0.0000 loss: 209.0491 loss_cls: 85.0182 loss_bbox: 57.4683 loss_dfl: 66.5625 04/24 02:10:08 - mmengine - INFO - Epoch(train) [23][200/290] base_lr: 2.0000e-03 lr: 1.5842e-03 eta: 1:12:22 time: 0.1671 data_time: 0.0064 memory: 4564 grad_norm: 0.0000 loss: 209.1266 loss_cls: 85.1896 loss_bbox: 57.3745 loss_dfl: 66.5625 04/24 02:10:17 - mmengine - INFO - Epoch(train) [23][250/290] base_lr: 2.0000e-03 lr: 1.5842e-03 eta: 1:12:10 time: 0.1837 data_time: 0.0105 memory: 4910 grad_norm: 0.0000 loss: 208.2432 loss_cls: 84.8034 loss_bbox: 56.8773 loss_dfl: 66.5625 04/24 02:10:24 - mmengine - INFO - Exp name: yolo_world_v2_l_vlpan_bn_2e-4_80e_8gpus_mask-refine_prompt_tuning_coco_20240424_014520 04/24 02:10:35 - mmengine - INFO - Epoch(train) [24][ 50/290] base_lr: 2.0000e-03 lr: 1.5644e-03 eta: 1:11:55 time: 0.2201 data_time: 0.1276 memory: 4577 grad_norm: 0.0000 loss: 209.5880 loss_cls: 85.2076 loss_bbox: 57.8179 loss_dfl: 66.5625 04/24 02:10:45 - mmengine - INFO - Epoch(train) [24][100/290] base_lr: 2.0000e-03 lr: 1.5644e-03 eta: 1:11:46 time: 0.1960 data_time: 0.0311 memory: 4696 grad_norm: 0.0000 loss: 208.6001 loss_cls: 84.8341 loss_bbox: 57.2035 loss_dfl: 66.5625 04/24 02:10:54 - mmengine - INFO - Epoch(train) [24][150/290] base_lr: 2.0000e-03 lr: 1.5644e-03 eta: 1:11:32 time: 0.1715 data_time: 0.0128 memory: 4976 grad_norm: 0.0000 loss: 208.6136 loss_cls: 85.1773 loss_bbox: 56.8737 loss_dfl: 66.5625 04/24 02:11:02 - mmengine - INFO - Epoch(train) [24][200/290] base_lr: 2.0000e-03 lr: 1.5644e-03 eta: 1:11:18 time: 0.1651 data_time: 0.0151 memory: 4936 grad_norm: 0.0000 loss: 208.7838 loss_cls: 85.0152 loss_bbox: 57.2062 loss_dfl: 66.5625 04/24 02:11:11 - mmengine - INFO - Epoch(train) [24][250/290] base_lr: 2.0000e-03 lr: 1.5644e-03 eta: 1:11:07 time: 0.1854 data_time: 0.0121 memory: 4443 grad_norm: 0.0000 loss: 208.3897 loss_cls: 84.9978 loss_bbox: 56.8295 loss_dfl: 66.5625 04/24 02:11:18 - mmengine - INFO - Exp name: yolo_world_v2_l_vlpan_bn_2e-4_80e_8gpus_mask-refine_prompt_tuning_coco_20240424_014520 04/24 02:11:28 - mmengine - INFO - Exp name: yolo_world_v2_l_vlpan_bn_2e-4_80e_8gpus_mask-refine_prompt_tuning_coco_20240424_014520 04/24 02:11:30 - mmengine - INFO - Epoch(train) [25][ 50/290] base_lr: 2.0000e-03 lr: 1.5446e-03 eta: 1:10:52 time: 0.2248 data_time: 0.0592 memory: 4671 grad_norm: 0.0000 loss: 208.8650 loss_cls: 85.1059 loss_bbox: 57.1966 loss_dfl: 66.5625 04/24 02:11:38 - mmengine - INFO - Epoch(train) [25][100/290] base_lr: 2.0000e-03 lr: 1.5446e-03 eta: 1:10:40 time: 0.1743 data_time: 0.0080 memory: 4430 grad_norm: 0.0000 loss: 208.8574 loss_cls: 84.8932 loss_bbox: 57.4017 loss_dfl: 66.5625 04/24 02:11:47 - mmengine - INFO - Epoch(train) [25][150/290] base_lr: 2.0000e-03 lr: 1.5446e-03 eta: 1:10:27 time: 0.1710 data_time: 0.0085 memory: 4456 grad_norm: 0.0000 loss: 209.0664 loss_cls: 85.1000 loss_bbox: 57.4039 loss_dfl: 66.5625 04/24 02:11:56 - mmengine - INFO - Epoch(train) [25][200/290] base_lr: 2.0000e-03 lr: 1.5446e-03 eta: 1:10:16 time: 0.1838 data_time: 0.0111 memory: 4577 grad_norm: 0.0000 loss: 208.9330 loss_cls: 85.0132 loss_bbox: 57.3574 loss_dfl: 66.5625 04/24 02:12:05 - mmengine - INFO - Epoch(train) [25][250/290] base_lr: 2.0000e-03 lr: 1.5446e-03 eta: 1:10:04 time: 0.1821 data_time: 0.0143 memory: 4470 grad_norm: 0.0000 loss: 208.9774 loss_cls: 85.0665 loss_bbox: 57.3484 loss_dfl: 66.5625 04/24 02:12:12 - mmengine - INFO - Exp name: yolo_world_v2_l_vlpan_bn_2e-4_80e_8gpus_mask-refine_prompt_tuning_coco_20240424_014520 04/24 02:12:12 - mmengine - INFO - Saving checkpoint at 25 epochs 04/24 02:12:16 - mmengine - INFO - Epoch(val) [25][ 50/1160] eta: 0:00:26 time: 0.0240 data_time: 0.0016 memory: 4750 04/24 02:12:18 - mmengine - INFO - Epoch(val) [25][ 100/1160] eta: 0:00:27 time: 0.0286 data_time: 0.0008 memory: 597 04/24 02:12:19 - mmengine - INFO - Epoch(val) [25][ 150/1160] eta: 0:00:28 time: 0.0306 data_time: 0.0004 memory: 597 04/24 02:12:21 - mmengine - INFO - Epoch(val) [25][ 200/1160] eta: 0:00:25 time: 0.0239 data_time: 0.0006 memory: 597 04/24 02:12:22 - mmengine - INFO - Epoch(val) [25][ 250/1160] eta: 0:00:25 time: 0.0328 data_time: 0.0027 memory: 597 04/24 02:12:24 - mmengine - INFO - Epoch(val) [25][ 300/1160] eta: 0:00:23 time: 0.0262 data_time: 0.0004 memory: 597 04/24 02:12:24 - mmengine - INFO - Epoch(val) [25][ 350/1160] eta: 0:00:21 time: 0.0181 data_time: 0.0003 memory: 597 04/24 02:12:26 - mmengine - INFO - Epoch(val) [25][ 400/1160] eta: 0:00:19 time: 0.0231 data_time: 0.0004 memory: 597 04/24 02:12:27 - mmengine - INFO - Epoch(val) [25][ 450/1160] eta: 0:00:18 time: 0.0283 data_time: 0.0014 memory: 597 04/24 02:12:29 - mmengine - INFO - Epoch(val) [25][ 500/1160] eta: 0:00:17 time: 0.0322 data_time: 0.0010 memory: 597 04/24 02:12:30 - mmengine - INFO - Epoch(val) [25][ 550/1160] eta: 0:00:16 time: 0.0369 data_time: 0.0011 memory: 597 04/24 02:12:32 - mmengine - INFO - Epoch(val) [25][ 600/1160] eta: 0:00:15 time: 0.0263 data_time: 0.0004 memory: 597 04/24 02:12:33 - mmengine - INFO - Epoch(val) [25][ 650/1160] eta: 0:00:13 time: 0.0201 data_time: 0.0004 memory: 597 04/24 02:12:34 - mmengine - INFO - Epoch(val) [25][ 700/1160] eta: 0:00:12 time: 0.0300 data_time: 0.0005 memory: 597 04/24 02:12:36 - mmengine - INFO - Epoch(val) [25][ 750/1160] eta: 0:00:11 time: 0.0274 data_time: 0.0006 memory: 597 04/24 02:12:37 - mmengine - INFO - Epoch(val) [25][ 800/1160] eta: 0:00:09 time: 0.0278 data_time: 0.0009 memory: 597 04/24 02:12:39 - mmengine - INFO - Epoch(val) [25][ 850/1160] eta: 0:00:08 time: 0.0327 data_time: 0.0011 memory: 597 04/24 02:12:40 - mmengine - INFO - Epoch(val) [25][ 900/1160] eta: 0:00:07 time: 0.0255 data_time: 0.0005 memory: 597 04/24 02:12:41 - mmengine - INFO - Epoch(val) [25][ 950/1160] eta: 0:00:05 time: 0.0202 data_time: 0.0005 memory: 597 04/24 02:12:42 - mmengine - INFO - Epoch(val) [25][1000/1160] eta: 0:00:04 time: 0.0261 data_time: 0.0009 memory: 597 04/24 02:12:44 - mmengine - INFO - Epoch(val) [25][1050/1160] eta: 0:00:03 time: 0.0330 data_time: 0.0015 memory: 597 04/24 02:12:45 - mmengine - INFO - Epoch(val) [25][1100/1160] eta: 0:00:01 time: 0.0258 data_time: 0.0012 memory: 597 04/24 02:12:47 - mmengine - INFO - Epoch(val) [25][1150/1160] eta: 0:00:00 time: 0.0282 data_time: 0.0006 memory: 597 04/24 02:12:47 - mmengine - INFO - Evaluating bbox... Loading and preparing results... 04/24 02:12:47 - mmengine - ERROR - /opt/conda/lib/python3.10/site-packages/mmdet/evaluation/metrics/coco_metric.py - compute_metrics - 469 - The testing results of the whole dataset is empty.
Hi @zhujiajian98 I also encountered the same validation set error, have you solved it?
- INFO - Epoch(train) [16][100/290] base_lr: 2.0000e-03 lr: 1.7228e-03 eta: 1:20:37 time: 0.1723 data_time: 0.0100 memory: 4416 grad_norm: 0.0000 loss: 208.9665 loss_cls: 84.9839 loss_b
Hi @lin-whale What datasets are you using, what is the approximate size of the data set, and why only one loss decreases after running my data set, while the other two losses remain unchanged.
@chenjiafu-George The origin issue was caused because not specify the pretrained Model weight. Do you offer that?
- INFO - Epoch(train) [16][100/290] base_lr: 2.0000e-03 lr: 1.7228e-03 eta: 1:20:37 time: 0.1723 data_time: 0.0100 memory: 4416 grad_norm: 0.0000 loss: 208.9665 loss_cls: 84.9839 loss_b
Hi @lin-whale What datasets are you using, what is the approximate size of the data set, and why only one loss decreases after running my data set, while the other two losses remain unchanged.
My dataset is customed, and it works well on about 1000 images.
@chenjiafu-George The origin issue was caused because not specify the pretrained Model weight. Do you offer that?
Hi @lin-whale Sorry, I can hardly help you with this problem, I didn't use this pretrain model.
@zhujiajian98 After setting
load_from = 'pretrained_models/yolo_world_l_clip_base_dual_vlpan_2e-3adamw_32xb16_100e_o365_goldg_cc3mlite_train_pretrained-7a5eea3b.pth'
, I have solved the problem of loss not decreasing during training, and the predicents are informative now. Just it is not as good as fine-tuning directly which is about 70% AP while the other is about 90% AP.
Hi @lin-whale, can I ask where you found the pre-trained weights?
@zhujiajian98 After setting
load_from = 'pretrained_models/yolo_world_l_clip_base_dual_vlpan_2e-3adamw_32xb16_100e_o365_goldg_cc3mlite_train_pretrained-7a5eea3b.pth'
, I have solved the problem of loss not decreasing during training, and the predicents are informative now. Just it is not as good as fine-tuning directly which is about 70% AP while the other is about 90% AP.Hi @lin-whale, can I ask where you found the pre-trained weights?
@lin-whale Thank you! Another thing is that when you load_from = 'pretrained_models/yolo_world_l_clip_base_dual_vlpan_2e-3adamw_32xb16_100e_o365_goldg_cc3mlite_train_pretrained-7a5eea3b.pth', didn't you receive an error like that:
The model and loaded state dict do not match exactly size mismatch for embeddings: copying a param with shape torch.Size([80, 512]) from checkpoint, the shape in current model is torch.Size([50, 512])
@lin-whale Thank you! Another thing is that when you load_from = 'pretrained_models/yolo_world_l_clip_base_dual_vlpan_2e-3adamw_32xb16_100e_o365_goldg_cc3mlite_train_pretrained-7a5eea3b.pth', didn't you receive an error like that:
The model and loaded state dict do not match exactly size mismatch for embeddings: copying a param with shape torch.Size([80, 512]) from checkpoint, the shape in current model is torch.Size([50, 512])
It seems the reason is some mismatching in config. Here is my config working right. Or you can upload your config file for more details.
_base_ = (
'../../third_party/mmyolo/configs/yolov8/'
'yolov8_l_mask-refine_syncbn_fast_8xb16-500e_coco.py')
custom_imports = dict(
imports=['yolo_world'],
allow_failed_imports=False)
# hyper-parameters
num_classes = 50
num_training_classes = 50
max_epochs = 80 # Maximum training epochs
close_mosaic_epochs = 10
save_epoch_intervals = 5
text_channels = 512
neck_embed_channels = [128, 256, _base_.last_stage_out_channels // 2]
neck_num_heads = [4, 8, _base_.last_stage_out_channels // 2 // 32]
base_lr = 2e-4
weight_decay = 0.05
train_batch_size_per_gpu = 64
load_from = 'pretrained_models/yolo_world_v2_l_obj365v1_goldg_cc3mlite_pretrain-ca93cd1f.pth'
# text_model_name = '../pretrained_models/clip-vit-base-patch32-projection'
text_model_name = 'openai/clip-vit-base-patch32'
persistent_workers = False
@lin-whale Here is my config file:
_base_ = ('../../third_party/mmyolo/configs/yolov8/'
'yolov8_l_syncbn_fast_8xb16-500e_coco.py')
custom_imports = dict(imports=['yolo_world'], allow_failed_imports=False)
# hyper-parameters
num_classes = 1
num_training_classes = 1
max_epochs = 10 # Maximum training epochs
close_mosaic_epochs = 10
save_epoch_intervals = 5
text_channels = 512
neck_embed_channels = [128, 256, _base_.last_stage_out_channels // 2]
neck_num_heads = [4, 8, _base_.last_stage_out_channels // 2 // 32]
base_lr = 2e-3
weight_decay = 0.0005
train_batch_size_per_gpu = 4
#load_from = 'pretrained_models/yolo_world_l_clip_t2i_bn_2e-3adamw_32xb16-100e_obj365v1_goldg_cc3mlite_train-ca93cd1f.pth'
load_from = 'pretrained_models/yolo_world_v2_l_vlpan_bn_sgd_1e-3_40e_8gpus_finetune_coco_ep80-e1288152.pth'
persistent_workers = False
# model settings
model = dict(type='SimpleYOLOWorldDetector',
mm_neck=True,
num_train_classes=num_training_classes,
num_test_classes=num_classes,
embedding_path='embeddings/yoloworld_wheat_emb.npy',
prompt_dim=text_channels,
num_prompts=1,
freeze_prompt=True,
data_preprocessor=dict(type='YOLOv5DetDataPreprocessor'),
backbone=dict(_delete_=True,
type='MultiModalYOLOBackbone',
text_model=None,
image_model={{_base_.model.backbone}},
with_text_model=False),
neck=dict(type='YOLOWorldPAFPN',
freeze_all=False,
guide_channels=text_channels,
embed_channels=neck_embed_channels,
num_heads=neck_num_heads,
block_cfg=dict(type='MaxSigmoidCSPLayerWithTwoConv')),
bbox_head=dict(type='YOLOWorldHead',
head_module=dict(
type='YOLOWorldHeadModule',
freeze_all=False,
use_bn_head=True,
embed_dims=text_channels,
num_classes=num_training_classes)),
train_cfg=dict(assigner=dict(num_classes=num_training_classes)))
meta_info = dict(classes = ('wheat'))
# dataset settings
coco_train_dataset = dict(type='YOLOv5CocoDataset',
metainfo = meta_info,
data_root='data/wheat',
ann_file='annotations/wheat_coco_train_ann.json',
data_prefix=dict(img='wheat_train/'),
filter_cfg=dict(filter_empty_gt=False, min_size=32),
pipeline=_base_.train_pipeline)
train_dataloader = dict(persistent_workers=persistent_workers,
batch_size=train_batch_size_per_gpu,
collate_fn=dict(type='yolow_collate'),
dataset=coco_train_dataset)
coco_val_dataset = dict(type='YOLOv5CocoDataset',
metainfo = meta_info,
data_root='data/wheat',
ann_file='annotations/wheat_coco_val_ann.json',
data_prefix=dict(img='wheat_val/'),
filter_cfg=dict(filter_empty_gt=False, min_size=32),
pipeline=_base_.test_pipeline)
val_dataloader = dict(dataset=coco_val_dataset)
test_dataloader = val_dataloader
# training settings
default_hooks = dict(param_scheduler=dict(scheduler_type='linear',
lr_factor=0.01,
max_epochs=max_epochs),
checkpoint=dict(max_keep_ckpts=-1,
save_best=None,
interval=save_epoch_intervals))
custom_hooks = [
dict(type='EMAHook',
ema_type='ExpMomentumEMA',
momentum=0.0001,
update_buffers=True,
strict_load=False,
priority=49),
dict(type='mmdet.PipelineSwitchHook',
switch_epoch=max_epochs - close_mosaic_epochs,
switch_pipeline=_base_.train_pipeline_stage2)
]
train_cfg = dict(max_epochs=max_epochs,
val_interval=5,
dynamic_intervals=[((max_epochs - close_mosaic_epochs),
_base_.val_interval_stage2)])
optim_wrapper = dict(optimizer=dict(
_delete_=True,
type='SGD',
lr=base_lr,
momentum=0.937,
nesterov=True,
weight_decay=weight_decay,
batch_size_per_gpu=train_batch_size_per_gpu))
# evaluation settings
val_evaluator = dict(_delete_=True,
type='mmdet.CocoMetric',
proposal_nums=(100, 1, 10),
ann_file='data/wheat/annotations/wheat_coco_val_ann.json',
metric='bbox',
classwise=True)
And my output:
Loads checkpoint by local backend from path: pretrained_models/yolo_world_v2_l_vlpan_bn_sgd_1e-3_40e_8gpus_finetune_coco_ep80-e1288152.pth The model and loaded state dict do not match exactly
size mismatch for embeddings: copying a param with shape torch.Size([80, 512]) from checkpoint, the shape in current model is torch.Size([512]). The model and loaded state dict do not match exactly
size mismatch for embeddings: copying a param with shape torch.Size([80, 512]) from checkpoint, the shape in current model is torch.Size([512]).
@pelinsuacar
Maybe to adjust num_classes = 1 num_training_classes = 1
and see if output altered?
@pelinsuacar Maybe to adjust
num_classes = 1 num_training_classes = 1
and see if output altered?
but they are already set as 1
@pelinsuacar It seems that coco datasets contains 80 categories and torch.Size([80, 512]) means that this checkpoint file is only used for coco dataset.
Try this checkpoint? load_from = 'pretrained_models/yolo_world_v2_l_obj365v1_goldg_cc3mlite_pretrain-ca93cd1f.pth'
@pelinsuacar Maybe to adjust
num_classes = 1 num_training_classes = 1
and see if output altered?but they are already set as 1
I mean modify these two parameters.
yolo_world_v2_l_obj365v1_goldg_cc3mlite_pretrain-ca93cd1f.pth
yes exactly but I could not find a suitable checkpoint. I tried yours but it gives even more error:
Loads checkpoint by local backend from path: pretrained_models/yolo_world_v2_l_obj365v1_goldg_cc3mlite_pretrain-ca93cd1f.pth The model and loaded state dict do not match exactly
unexpected key in source state_dict: backbone.text_model.model.text_model.embeddings.token_embedding.weight, backbone.text_model.model.text_model.embeddings.position_embedding.weight, backbone.text_model.model.text_model.encoder.layers.0.self_attn.k_proj.weight, backbone.text_model.model.text_model.encoder.layers.0.self_attn.k_proj.bias, backbone.text_model.model.text_model.encoder.layers.0.self_attn.v_proj.weight, backbone.text_model.model.text_model.encoder.layers.0.self_attn.v_proj.bias, backbone.text_model.model.text_model.encoder.layers.0.self_attn.q_proj.weight, backbone.text_model.model.text_model.encoder.layers.0.self_attn.q_proj.bias, backbone.text_model.model.text_model.encoder.layers.0.self_attn.out_proj.weight, backbone.text_model.model.text_model.encoder.layers.0.self_attn.out_proj.bias, backbone.text_model.model.text_model.encoder.layers.0.layer_norm1.weight, backbone.text_model.model.text_model.encoder.layers.0.layer_norm1.bias, backbone.text_model.model.text_model.encoder.layers.0.mlp.fc1.weight, backbone.text_model.model.text_model.encoder.layers.0.mlp.fc1.bias, backbone.text_model.model.text_model.encoder.layers.0.mlp.fc2.weight, backbone.text_model.model.text_model.encoder.layers.0.mlp.fc2.bias, backbone.text_model.model.text_model.encoder.layers.0.layer_norm2.weight, backbone.text_model.model.text_model.encoder.layers.0.layer_norm2.bias, backbone.text_model.model.text_model.encoder.layers.1.self_attn.k_proj.weight, backbone.text_model.model.text_model.encoder.layers.1.self_attn.k_proj.bias, backbone.text_model.model.text_model.encoder.layers.1.self_attn.v_proj.weight, backbone.text_model.model.text_model.encoder.layers.1.self_attn.v_proj.bias, backbone.text_model.model.text_model.encoder.layers.1.self_attn.q_proj.weight, backbone.text_model.model.text_model.encoder.layers.1.self_attn.q_proj.bias, backbone.text_model.model.text_model.encoder.layers.1.self_attn.out_proj.weight, backbone.text_model.model.text_model.encoder.layers.1.self_attn.out_proj.bias, backbone.text_model.model.text_model.encoder.layers.1.layer_norm1.weight, backbone.text_model.model.text_model.encoder.layers.1.layer_norm1.bias, backbone.text_model.model.text_model.encoder.layers.1.mlp.fc1.weight, backbone.text_model.model.text_model.encoder.layers.1.mlp.fc1.bias, backbone.text_model.model.text_model.encoder.layers.1.mlp.fc2.weight, backbone.text_model.model.text_model.encoder.layers.1.mlp.fc2.bias, backbone.text_model.model.text_model.encoder.layers.1.layer_norm2.weight, backbone.text_model.model.text_model.encoder.layers.1.layer_norm2.bias, backbone.text_model.model.text_model.encoder.layers.2.self_attn.k_proj.weight, backbone.text_model.model.text_model.encoder.layers.2.self_attn.k_proj.bias, backbone.text_model.model.text_model.encoder.layers.2.self_attn.v_proj.weight, backbone.text_model.model.text_model.encoder.layers.2.self_attn.v_proj.bias, backbone.text_model.model.text_model.encoder.layers.2.self_attn.q_proj.weight, backbone.text_model.model.text_model.encoder.layers.2.self_attn.q_proj.bias, backbone.text_model.model.text_model.encoder.layers.2.self_attn.out_proj.weight, backbone.text_model.model.text_model.encoder.layers.2.self_attn.out_proj.bias, backbone.text_model.model.text_model.encoder.layers.2.layer_norm1.weight, backbone.text_model.model.text_model.encoder.layers.2.layer_norm1.bias, backbone.text_model.model.text_model.encoder.layers.2.mlp.fc1.weight, backbone.text_model.model.text_model.encoder.layers.2.mlp.fc1.bias, backbone.text_model.model.text_model.encoder.layers.2.mlp.fc2.weight, backbone.text_model.model.text_model.encoder.layers.2.mlp.fc2.bias, backbone.text_model.model.text_model.encoder.layers.2.layer_norm2.weight, backbone.text_model.model.text_model.encoder.layers.2.layer_norm2.bias, backbone.text_model.model.text_model.encoder.layers.3.self_attn.k_proj.weight, backbone.text_model.model.text_model.encoder.layers.3.self_attn.k_proj.bias, backbone.text_model.model.text_model.encoder.layers.3.self_attn.v_proj.weight, backbone.text_model.model.text_model.encoder.layers.3.self_attn.v_proj.bias, backbone.text_model.model.text_model.encoder.layers.3.self_attn.q_proj.weight, backbone.text_model.model.text_model.encoder.layers.3.self_attn.q_proj.bias, backbone.text_model.model.text_model.encoder.layers.3.self_attn.out_proj.weight, backbone.text_model.model.text_model.encoder.layers.3.self_attn.out_proj.bias, backbone.text_model.model.text_model.encoder.layers.3.layer_norm1.weight, backbone.text_model.model.text_model.encoder.layers.3.layer_norm1.bias, backbone.text_model.model.text_model.encoder.layers.3.mlp.fc1.weight, backbone.text_model.model.text_model.encoder.layers.3.mlp.fc1.bias, backbone.text_model.model.text_model.encoder.layers.3.mlp.fc2.weight, backbone.text_model.model.text_model.encoder.layers.3.mlp.fc2.bias, backbone.text_model.model.text_model.encoder.layers.3.layer_norm2.weight, backbone.text_model.model.text_model.encoder.layers.3.layer_norm2.bias, backbone.text_model.model.text_model.encoder.layers.4.self_attn.k_proj.weight, backbone.text_model.model.text_model.encoder.layers.4.self_attn.k_proj.bias, backbone.text_model.model.text_model.encoder.layers.4.self_attn.v_proj.weight, backbone.text_model.model.text_model.encoder.layers.4.self_attn.v_proj.bias, backbone.text_model.model.text_model.encoder.layers.4.self_attn.q_proj.weight, backbone.text_model.model.text_model.encoder.layers.4.self_attn.q_proj.bias, backbone.text_model.model.text_model.encoder.layers.4.self_attn.out_proj.weight, backbone.text_model.model.text_model.encoder.layers.4.self_attn.out_proj.bias, backbone.text_model.model.text_model.encoder.layers.4.layer_norm1.weight, backbone.text_model.model.text_model.encoder.layers.4.layer_norm1.bias, backbone.text_model.model.text_model.encoder.layers.4.mlp.fc1.weight, backbone.text_model.model.text_model.encoder.layers.4.mlp.fc1.bias, backbone.text_model.model.text_model.encoder.layers.4.mlp.fc2.weight, backbone.text_model.model.text_model.encoder.layers.4.mlp.fc2.bias, backbone.text_model.model.text_model.encoder.layers.4.layer_norm2.weight, backbone.text_model.model.text_model.encoder.layers.4.layer_norm2.bias, backbone.text_model.model.text_model.encoder.layers.5.self_attn.k_proj.weight, backbone.text_model.model.text_model.encoder.layers.5.self_attn.k_proj.bias, backbone.text_model.model.text_model.encoder.layers.5.self_attn.v_proj.weight, backbone.text_model.model.text_model.encoder.layers.5.self_attn.v_proj.bias, backbone.text_model.model.text_model.encoder.layers.5.self_attn.q_proj.weight, backbone.text_model.model.text_model.encoder.layers.5.self_attn.q_proj.bias, backbone.text_model.model.text_model.encoder.layers.5.self_attn.out_proj.weight, backbone.text_model.model.text_model.encoder.layers.5.self_attn.out_proj.bias, backbone.text_model.model.text_model.encoder.layers.5.layer_norm1.weight, backbone.text_model.model.text_model.encoder.layers.5.layer_norm1.bias, backbone.text_model.model.text_model.encoder.layers.5.mlp.fc1.weight, backbone.text_model.model.text_model.encoder.layers.5.mlp.fc1.bias, backbone.text_model.model.text_model.encoder.layers.5.mlp.fc2.weight, backbone.text_model.model.text_model.encoder.layers.5.mlp.fc2.bias, backbone.text_model.model.text_model.encoder.layers.5.layer_norm2.weight, backbone.text_model.model.text_model.encoder.layers.5.layer_norm2.bias, backbone.text_model.model.text_model.encoder.layers.6.self_attn.k_proj.weight, backbone.text_model.model.text_model.encoder.layers.6.self_attn.k_proj.bias, backbone.text_model.model.text_model.encoder.layers.6.self_attn.v_proj.weight, backbone.text_model.model.text_model.encoder.layers.6.self_attn.v_proj.bias, backbone.text_model.model.text_model.encoder.layers.6.self_attn.q_proj.weight, backbone.text_model.model.text_model.encoder.layers.6.self_attn.q_proj.bias, backbone.text_model.model.text_model.encoder.layers.6.self_attn.out_proj.weight, backbone.text_model.model.text_model.encoder.layers.6.self_attn.out_proj.bias, backbone.text_model.model.text_model.encoder.layers.6.layer_norm1.weight, backbone.text_model.model.text_model.encoder.layers.6.layer_norm1.bias, backbone.text_model.model.text_model.encoder.layers.6.mlp.fc1.weight, backbone.text_model.model.text_model.encoder.layers.6.mlp.fc1.bias, backbone.text_model.model.text_model.encoder.layers.6.mlp.fc2.weight, backbone.text_model.model.text_model.encoder.layers.6.mlp.fc2.bias, backbone.text_model.model.text_model.encoder.layers.6.layer_norm2.weight, backbone.text_model.model.text_model.encoder.layers.6.layer_norm2.bias, backbone.text_model.model.text_model.encoder.layers.7.self_attn.k_proj.weight, backbone.text_model.model.text_model.encoder.layers.7.self_attn.k_proj.bias, backbone.text_model.model.text_model.encoder.layers.7.self_attn.v_proj.weight, backbone.text_model.model.text_model.encoder.layers.7.self_attn.v_proj.bias, backbone.text_model.model.text_model.encoder.layers.7.self_attn.q_proj.weight, backbone.text_model.model.text_model.encoder.layers.7.self_attn.q_proj.bias, backbone.text_model.model.text_model.encoder.layers.7.self_attn.out_proj.weight, backbone.text_model.model.text_model.encoder.layers.7.self_attn.out_proj.bias, backbone.text_model.model.text_model.encoder.layers.7.layer_norm1.weight, backbone.text_model.model.text_model.encoder.layers.7.layer_norm1.bias, backbone.text_model.model.text_model.encoder.layers.7.mlp.fc1.weight, backbone.text_model.model.text_model.encoder.layers.7.mlp.fc1.bias, backbone.text_model.model.text_model.encoder.layers.7.mlp.fc2.weight, backbone.text_model.model.text_model.encoder.layers.7.mlp.fc2.bias, backbone.text_model.model.text_model.encoder.layers.7.layer_norm2.weight, backbone.text_model.model.text_model.encoder.layers.7.layer_norm2.bias, backbone.text_model.model.text_model.encoder.layers.8.self_attn.k_proj.weight, backbone.text_model.model.text_model.encoder.layers.8.self_attn.k_proj.bias, backbone.text_model.model.text_model.encoder.layers.8.self_attn.v_proj.weight, backbone.text_model.model.text_model.encoder.layers.8.self_attn.v_proj.bias, backbone.text_model.model.text_model.encoder.layers.8.self_attn.q_proj.weight, backbone.text_model.model.text_model.encoder.layers.8.self_attn.q_proj.bias, backbone.text_model.model.text_model.encoder.layers.8.self_attn.out_proj.weight, backbone.text_model.model.text_model.encoder.layers.8.self_attn.out_proj.bias, backbone.text_model.model.text_model.encoder.layers.8.layer_norm1.weight, backbone.text_model.model.text_model.encoder.layers.8.layer_norm1.bias, backbone.text_model.model.text_model.encoder.layers.8.mlp.fc1.weight, backbone.text_model.model.text_model.encoder.layers.8.mlp.fc1.bias, backbone.text_model.model.text_model.encoder.layers.8.mlp.fc2.weight, backbone.text_model.model.text_model.encoder.layers.8.mlp.fc2.bias, backbone.text_model.model.text_model.encoder.layers.8.layer_norm2.weight, backbone.text_model.model.text_model.encoder.layers.8.layer_norm2.bias, backbone.text_model.model.text_model.encoder.layers.9.self_attn.k_proj.weight, backbone.text_model.model.text_model.encoder.layers.9.self_attn.k_proj.bias, backbone.text_model.model.text_model.encoder.layers.9.self_attn.v_proj.weight, backbone.text_model.model.text_model.encoder.layers.9.self_attn.v_proj.bias, backbone.text_model.model.text_model.encoder.layers.9.self_attn.q_proj.weight, backbone.text_model.model.text_model.encoder.layers.9.self_attn.q_proj.bias, backbone.text_model.model.text_model.encoder.layers.9.self_attn.out_proj.weight, backbone.text_model.model.text_model.encoder.layers.9.self_attn.out_proj.bias, backbone.text_model.model.text_model.encoder.layers.9.layer_norm1.weight, backbone.text_model.model.text_model.encoder.layers.9.layer_norm1.bias, backbone.text_model.model.text_model.encoder.layers.9.mlp.fc1.weight, backbone.text_model.model.text_model.encoder.layers.9.mlp.fc1.bias, backbone.text_model.model.text_model.encoder.layers.9.mlp.fc2.weight, backbone.text_model.model.text_model.encoder.layers.9.mlp.fc2.bias, backbone.text_model.model.text_model.encoder.layers.9.layer_norm2.weight, backbone.text_model.model.text_model.encoder.layers.9.layer_norm2.bias, backbone.text_model.model.text_model.encoder.layers.10.self_attn.k_proj.weight, backbone.text_model.model.text_model.encoder.layers.10.self_attn.k_proj.bias, backbone.text_model.model.text_model.encoder.layers.10.self_attn.v_proj.weight, backbone.text_model.model.text_model.encoder.layers.10.self_attn.v_proj.bias, backbone.text_model.model.text_model.encoder.layers.10.self_attn.q_proj.weight, backbone.text_model.model.text_model.encoder.layers.10.self_attn.q_proj.bias, backbone.text_model.model.text_model.encoder.layers.10.self_attn.out_proj.weight, backbone.text_model.model.text_model.encoder.layers.10.self_attn.out_proj.bias, backbone.text_model.model.text_model.encoder.layers.10.layer_norm1.weight, backbone.text_model.model.text_model.encoder.layers.10.layer_norm1.bias, backbone.text_model.model.text_model.encoder.layers.10.mlp.fc1.weight, backbone.text_model.model.text_model.encoder.layers.10.mlp.fc1.bias, backbone.text_model.model.text_model.encoder.layers.10.mlp.fc2.weight, backbone.text_model.model.text_model.encoder.layers.10.mlp.fc2.bias, backbone.text_model.model.text_model.encoder.layers.10.layer_norm2.weight, backbone.text_model.model.text_model.encoder.layers.10.layer_norm2.bias, backbone.text_model.model.text_model.encoder.layers.11.self_attn.k_proj.weight, backbone.text_model.model.text_model.encoder.layers.11.self_attn.k_proj.bias, backbone.text_model.model.text_model.encoder.layers.11.self_attn.v_proj.weight, backbone.text_model.model.text_model.encoder.layers.11.self_attn.v_proj.bias, backbone.text_model.model.text_model.encoder.layers.11.self_attn.q_proj.weight, backbone.text_model.model.text_model.encoder.layers.11.self_attn.q_proj.bias, backbone.text_model.model.text_model.encoder.layers.11.self_attn.out_proj.weight, backbone.text_model.model.text_model.encoder.layers.11.self_attn.out_proj.bias, backbone.text_model.model.text_model.encoder.layers.11.layer_norm1.weight, backbone.text_model.model.text_model.encoder.layers.11.layer_norm1.bias, backbone.text_model.model.text_model.encoder.layers.11.mlp.fc1.weight, backbone.text_model.model.text_model.encoder.layers.11.mlp.fc1.bias, backbone.text_model.model.text_model.encoder.layers.11.mlp.fc2.weight, backbone.text_model.model.text_model.encoder.layers.11.mlp.fc2.bias, backbone.text_model.model.text_model.encoder.layers.11.layer_norm2.weight, backbone.text_model.model.text_model.encoder.layers.11.layer_norm2.bias, backbone.text_model.model.text_model.final_layer_norm.weight, backbone.text_model.model.text_model.final_layer_norm.bias, backbone.text_model.model.text_projection.weight
missing keys in source state_dict: embeddings
The model and loaded state dict do not match exactly
unexpected key in source state_dict: backbone.text_model.model.text_model.embeddings.token_embedding.weight, backbone.text_model.model.text_model.embeddings.position_embedding.weight, backbone.text_model.model.text_model.encoder.layers.0.self_attn.k_proj.weight, backbone.text_model.model.text_model.encoder.layers.0.self_attn.k_proj.bias, backbone.text_model.model.text_model.encoder.layers.0.self_attn.v_proj.weight, backbone.text_model.model.text_model.encoder.layers.0.self_attn.v_proj.bias, backbone.text_model.model.text_model.encoder.layers.0.self_attn.q_proj.weight, backbone.text_model.model.text_model.encoder.layers.0.self_attn.q_proj.bias, backbone.text_model.model.text_model.encoder.layers.0.self_attn.out_proj.weight, backbone.text_model.model.text_model.encoder.layers.0.self_attn.out_proj.bias, backbone.text_model.model.text_model.encoder.layers.0.layer_norm1.weight, backbone.text_model.model.text_model.encoder.layers.0.layer_norm1.bias, backbone.text_model.model.text_model.encoder.layers.0.mlp.fc1.weight, backbone.text_model.model.text_model.encoder.layers.0.mlp.fc1.bias, backbone.text_model.model.text_model.encoder.layers.0.mlp.fc2.weight, backbone.text_model.model.text_model.encoder.layers.0.mlp.fc2.bias, backbone.text_model.model.text_model.encoder.layers.0.layer_norm2.weight, backbone.text_model.model.text_model.encoder.layers.0.layer_norm2.bias, backbone.text_model.model.text_model.encoder.layers.1.self_attn.k_proj.weight, backbone.text_model.model.text_model.encoder.layers.1.self_attn.k_proj.bias, backbone.text_model.model.text_model.encoder.layers.1.self_attn.v_proj.weight, backbone.text_model.model.text_model.encoder.layers.1.self_attn.v_proj.bias, backbone.text_model.model.text_model.encoder.layers.1.self_attn.q_proj.weight, backbone.text_model.model.text_model.encoder.layers.1.self_attn.q_proj.bias, backbone.text_model.model.text_model.encoder.layers.1.self_attn.out_proj.weight, backbone.text_model.model.text_model.encoder.layers.1.self_attn.out_proj.bias, backbone.text_model.model.text_model.encoder.layers.1.layer_norm1.weight, backbone.text_model.model.text_model.encoder.layers.1.layer_norm1.bias, backbone.text_model.model.text_model.encoder.layers.1.mlp.fc1.weight, backbone.text_model.model.text_model.encoder.layers.1.mlp.fc1.bias, backbone.text_model.model.text_model.encoder.layers.1.mlp.fc2.weight, backbone.text_model.model.text_model.encoder.layers.1.mlp.fc2.bias, backbone.text_model.model.text_model.encoder.layers.1.layer_norm2.weight, backbone.text_model.model.text_model.encoder.layers.1.layer_norm2.bias, backbone.text_model.model.text_model.encoder.layers.2.self_attn.k_proj.weight, backbone.text_model.model.text_model.encoder.layers.2.self_attn.k_proj.bias, backbone.text_model.model.text_model.encoder.layers.2.self_attn.v_proj.weight, backbone.text_model.model.text_model.encoder.layers.2.self_attn.v_proj.bias, backbone.text_model.model.text_model.encoder.layers.2.self_attn.q_proj.weight, backbone.text_model.model.text_model.encoder.layers.2.self_attn.q_proj.bias, backbone.text_model.model.text_model.encoder.layers.2.self_attn.out_proj.weight, backbone.text_model.model.text_model.encoder.layers.2.self_attn.out_proj.bias, backbone.text_model.model.text_model.encoder.layers.2.layer_norm1.weight, backbone.text_model.model.text_model.encoder.layers.2.layer_norm1.bias, backbone.text_model.model.text_model.encoder.layers.2.mlp.fc1.weight, backbone.text_model.model.text_model.encoder.layers.2.mlp.fc1.bias, backbone.text_model.model.text_model.encoder.layers.2.mlp.fc2.weight, backbone.text_model.model.text_model.encoder.layers.2.mlp.fc2.bias, backbone.text_model.model.text_model.encoder.layers.2.layer_norm2.weight, backbone.text_model.model.text_model.encoder.layers.2.layer_norm2.bias, backbone.text_model.model.text_model.encoder.layers.3.self_attn.k_proj.weight, backbone.text_model.model.text_model.encoder.layers.3.self_attn.k_proj.bias, backbone.text_model.model.text_model.encoder.layers.3.self_attn.v_proj.weight, backbone.text_model.model.text_model.encoder.layers.3.self_attn.v_proj.bias, backbone.text_model.model.text_model.encoder.layers.3.self_attn.q_proj.weight, backbone.text_model.model.text_model.encoder.layers.3.self_attn.q_proj.bias, backbone.text_model.model.text_model.encoder.layers.3.self_attn.out_proj.weight, backbone.text_model.model.text_model.encoder.layers.3.self_attn.out_proj.bias, backbone.text_model.model.text_model.encoder.layers.3.layer_norm1.weight, backbone.text_model.model.text_model.encoder.layers.3.layer_norm1.bias, backbone.text_model.model.text_model.encoder.layers.3.mlp.fc1.weight, backbone.text_model.model.text_model.encoder.layers.3.mlp.fc1.bias, backbone.text_model.model.text_model.encoder.layers.3.mlp.fc2.weight, backbone.text_model.model.text_model.encoder.layers.3.mlp.fc2.bias, backbone.text_model.model.text_model.encoder.layers.3.layer_norm2.weight, backbone.text_model.model.text_model.encoder.layers.3.layer_norm2.bias, backbone.text_model.model.text_model.encoder.layers.4.self_attn.k_proj.weight, backbone.text_model.model.text_model.encoder.layers.4.self_attn.k_proj.bias, backbone.text_model.model.text_model.encoder.layers.4.self_attn.v_proj.weight, backbone.text_model.model.text_model.encoder.layers.4.self_attn.v_proj.bias, backbone.text_model.model.text_model.encoder.layers.4.self_attn.q_proj.weight, backbone.text_model.model.text_model.encoder.layers.4.self_attn.q_proj.bias, backbone.text_model.model.text_model.encoder.layers.4.self_attn.out_proj.weight, backbone.text_model.model.text_model.encoder.layers.4.self_attn.out_proj.bias, backbone.text_model.model.text_model.encoder.layers.4.layer_norm1.weight, backbone.text_model.model.text_model.encoder.layers.4.layer_norm1.bias, backbone.text_model.model.text_model.encoder.layers.4.mlp.fc1.weight, backbone.text_model.model.text_model.encoder.layers.4.mlp.fc1.bias, backbone.text_model.model.text_model.encoder.layers.4.mlp.fc2.weight, backbone.text_model.model.text_model.encoder.layers.4.mlp.fc2.bias, backbone.text_model.model.text_model.encoder.layers.4.layer_norm2.weight, backbone.text_model.model.text_model.encoder.layers.4.layer_norm2.bias, backbone.text_model.model.text_model.encoder.layers.5.self_attn.k_proj.weight, backbone.text_model.model.text_model.encoder.layers.5.self_attn.k_proj.bias, backbone.text_model.model.text_model.encoder.layers.5.self_attn.v_proj.weight, backbone.text_model.model.text_model.encoder.layers.5.self_attn.v_proj.bias, backbone.text_model.model.text_model.encoder.layers.5.self_attn.q_proj.weight, backbone.text_model.model.text_model.encoder.layers.5.self_attn.q_proj.bias, backbone.text_model.model.text_model.encoder.layers.5.self_attn.out_proj.weight, backbone.text_model.model.text_model.encoder.layers.5.self_attn.out_proj.bias, backbone.text_model.model.text_model.encoder.layers.5.layer_norm1.weight, backbone.text_model.model.text_model.encoder.layers.5.layer_norm1.bias, backbone.text_model.model.text_model.encoder.layers.5.mlp.fc1.weight, backbone.text_model.model.text_model.encoder.layers.5.mlp.fc1.bias, backbone.text_model.model.text_model.encoder.layers.5.mlp.fc2.weight, backbone.text_model.model.text_model.encoder.layers.5.mlp.fc2.bias, backbone.text_model.model.text_model.encoder.layers.5.layer_norm2.weight, backbone.text_model.model.text_model.encoder.layers.5.layer_norm2.bias, backbone.text_model.model.text_model.encoder.layers.6.self_attn.k_proj.weight, backbone.text_model.model.text_model.encoder.layers.6.self_attn.k_proj.bias, backbone.text_model.model.text_model.encoder.layers.6.self_attn.v_proj.weight, backbone.text_model.model.text_model.encoder.layers.6.self_attn.v_proj.bias, backbone.text_model.model.text_model.encoder.layers.6.self_attn.q_proj.weight, backbone.text_model.model.text_model.encoder.layers.6.self_attn.q_proj.bias, backbone.text_model.model.text_model.encoder.layers.6.self_attn.out_proj.weight, backbone.text_model.model.text_model.encoder.layers.6.self_attn.out_proj.bias, backbone.text_model.model.text_model.encoder.layers.6.layer_norm1.weight, backbone.text_model.model.text_model.encoder.layers.6.layer_norm1.bias, backbone.text_model.model.text_model.encoder.layers.6.mlp.fc1.weight, backbone.text_model.model.text_model.encoder.layers.6.mlp.fc1.bias, backbone.text_model.model.text_model.encoder.layers.6.mlp.fc2.weight, backbone.text_model.model.text_model.encoder.layers.6.mlp.fc2.bias, backbone.text_model.model.text_model.encoder.layers.6.layer_norm2.weight, backbone.text_model.model.text_model.encoder.layers.6.layer_norm2.bias, backbone.text_model.model.text_model.encoder.layers.7.self_attn.k_proj.weight, backbone.text_model.model.text_model.encoder.layers.7.self_attn.k_proj.bias, backbone.text_model.model.text_model.encoder.layers.7.self_attn.v_proj.weight, backbone.text_model.model.text_model.encoder.layers.7.self_attn.v_proj.bias, backbone.text_model.model.text_model.encoder.layers.7.self_attn.q_proj.weight, backbone.text_model.model.text_model.encoder.layers.7.self_attn.q_proj.bias, backbone.text_model.model.text_model.encoder.layers.7.self_attn.out_proj.weight, backbone.text_model.model.text_model.encoder.layers.7.self_attn.out_proj.bias, backbone.text_model.model.text_model.encoder.layers.7.layer_norm1.weight, backbone.text_model.model.text_model.encoder.layers.7.layer_norm1.bias, backbone.text_model.model.text_model.encoder.layers.7.mlp.fc1.weight, backbone.text_model.model.text_model.encoder.layers.7.mlp.fc1.bias, backbone.text_model.model.text_model.encoder.layers.7.mlp.fc2.weight, backbone.text_model.model.text_model.encoder.layers.7.mlp.fc2.bias, backbone.text_model.model.text_model.encoder.layers.7.layer_norm2.weight, backbone.text_model.model.text_model.encoder.layers.7.layer_norm2.bias, backbone.text_model.model.text_model.encoder.layers.8.self_attn.k_proj.weight, backbone.text_model.model.text_model.encoder.layers.8.self_attn.k_proj.bias, backbone.text_model.model.text_model.encoder.layers.8.self_attn.v_proj.weight, backbone.text_model.model.text_model.encoder.layers.8.self_attn.v_proj.bias, backbone.text_model.model.text_model.encoder.layers.8.self_attn.q_proj.weight, backbone.text_model.model.text_model.encoder.layers.8.self_attn.q_proj.bias, backbone.text_model.model.text_model.encoder.layers.8.self_attn.out_proj.weight, backbone.text_model.model.text_model.encoder.layers.8.self_attn.out_proj.bias, backbone.text_model.model.text_model.encoder.layers.8.layer_norm1.weight, backbone.text_model.model.text_model.encoder.layers.8.layer_norm1.bias, backbone.text_model.model.text_model.encoder.layers.8.mlp.fc1.weight, backbone.text_model.model.text_model.encoder.layers.8.mlp.fc1.bias, backbone.text_model.model.text_model.encoder.layers.8.mlp.fc2.weight, backbone.text_model.model.text_model.encoder.layers.8.mlp.fc2.bias, backbone.text_model.model.text_model.encoder.layers.8.layer_norm2.weight, backbone.text_model.model.text_model.encoder.layers.8.layer_norm2.bias, backbone.text_model.model.text_model.encoder.layers.9.self_attn.k_proj.weight, backbone.text_model.model.text_model.encoder.layers.9.self_attn.k_proj.bias, backbone.text_model.model.text_model.encoder.layers.9.self_attn.v_proj.weight, backbone.text_model.model.text_model.encoder.layers.9.self_attn.v_proj.bias, backbone.text_model.model.text_model.encoder.layers.9.self_attn.q_proj.weight, backbone.text_model.model.text_model.encoder.layers.9.self_attn.q_proj.bias, backbone.text_model.model.text_model.encoder.layers.9.self_attn.out_proj.weight, backbone.text_model.model.text_model.encoder.layers.9.self_attn.out_proj.bias, backbone.text_model.model.text_model.encoder.layers.9.layer_norm1.weight, backbone.text_model.model.text_model.encoder.layers.9.layer_norm1.bias, backbone.text_model.model.text_model.encoder.layers.9.mlp.fc1.weight, backbone.text_model.model.text_model.encoder.layers.9.mlp.fc1.bias, backbone.text_model.model.text_model.encoder.layers.9.mlp.fc2.weight, backbone.text_model.model.text_model.encoder.layers.9.mlp.fc2.bias, backbone.text_model.model.text_model.encoder.layers.9.layer_norm2.weight, backbone.text_model.model.text_model.encoder.layers.9.layer_norm2.bias, backbone.text_model.model.text_model.encoder.layers.10.self_attn.k_proj.weight, backbone.text_model.model.text_model.encoder.layers.10.self_attn.k_proj.bias, backbone.text_model.model.text_model.encoder.layers.10.self_attn.v_proj.weight, backbone.text_model.model.text_model.encoder.layers.10.self_attn.v_proj.bias, backbone.text_model.model.text_model.encoder.layers.10.self_attn.q_proj.weight, backbone.text_model.model.text_model.encoder.layers.10.self_attn.q_proj.bias, backbone.text_model.model.text_model.encoder.layers.10.self_attn.out_proj.weight, backbone.text_model.model.text_model.encoder.layers.10.self_attn.out_proj.bias, backbone.text_model.model.text_model.encoder.layers.10.layer_norm1.weight, backbone.text_model.model.text_model.encoder.layers.10.layer_norm1.bias, backbone.text_model.model.text_model.encoder.layers.10.mlp.fc1.weight, backbone.text_model.model.text_model.encoder.layers.10.mlp.fc1.bias, backbone.text_model.model.text_model.encoder.layers.10.mlp.fc2.weight, backbone.text_model.model.text_model.encoder.layers.10.mlp.fc2.bias, backbone.text_model.model.text_model.encoder.layers.10.layer_norm2.weight, backbone.text_model.model.text_model.encoder.layers.10.layer_norm2.bias, backbone.text_model.model.text_model.encoder.layers.11.self_attn.k_proj.weight, backbone.text_model.model.text_model.encoder.layers.11.self_attn.k_proj.bias, backbone.text_model.model.text_model.encoder.layers.11.self_attn.v_proj.weight, backbone.text_model.model.text_model.encoder.layers.11.self_attn.v_proj.bias, backbone.text_model.model.text_model.encoder.layers.11.self_attn.q_proj.weight, backbone.text_model.model.text_model.encoder.layers.11.self_attn.q_proj.bias, backbone.text_model.model.text_model.encoder.layers.11.self_attn.out_proj.weight, backbone.text_model.model.text_model.encoder.layers.11.self_attn.out_proj.bias, backbone.text_model.model.text_model.encoder.layers.11.layer_norm1.weight, backbone.text_model.model.text_model.encoder.layers.11.layer_norm1.bias, backbone.text_model.model.text_model.encoder.layers.11.mlp.fc1.weight, backbone.text_model.model.text_model.encoder.layers.11.mlp.fc1.bias, backbone.text_model.model.text_model.encoder.layers.11.mlp.fc2.weight, backbone.text_model.model.text_model.encoder.layers.11.mlp.fc2.bias, backbone.text_model.model.text_model.encoder.layers.11.layer_norm2.weight, backbone.text_model.model.text_model.encoder.layers.11.layer_norm2.bias, backbone.text_model.model.text_model.final_layer_norm.weight, backbone.text_model.model.text_model.final_layer_norm.bias, backbone.text_model.model.text_projection.weight
missing keys in source state_dict: embeddings
05/30 10:45:01 - mmengine - INFO - Load checkpoint from pretrained_models/yolo_world_v2_l_obj365v1_goldg_cc3mlite_pretrain-ca93cd1f.pth
This checkpoint has text model but in our case we don't. I couldn't understand how it does not give any error with your config. I tried with your config as well but same result :(
@pelinsuacar Maybe to adjust
num_classes = 1 num_training_classes = 1
and see if output altered?but they are already set as 1
I mean modify these two parameters.
@pelinsuacar Have you tried this? If output is different you can locate your problem. Use the checkpoint you used previously.
@pelinsuacar Maybe to adjust
num_classes = 1 num_training_classes = 1
and see if output altered?but they are already set as 1
I mean modify these two parameters.
@pelinsuacar Have you tried this? If output is different you can locate your problem.
yes it did not change anything cuz it checkes the size of the embedding array that I provide ('embeddings/yoloworld_wheat_emb.npy') which is (1,512) instead of (80,512)
@lin-whale so you were able to use this checkpoint "yolo_world_v2_l_obj365v1_goldg_cc3mlite_pretrain-ca93cd1f.pth" with your config that does not have any text model?
@pelinsuacar Maybe to adjust
num_classes = 1 num_training_classes = 1
and see if output altered?but they are already set as 1
I mean modify these two parameters.
@pelinsuacar Have you tried this? If output is different you can locate your problem.
yes it did not change anything cuz it checkes the size of the embedding array that I provide ('embeddings/yoloworld_wheat_emb.npy') which is (1,512) instead of (80,512)
@lin-whale so you were able to use this checkpoint "yolo_world_v2_l_obj365v1_goldg_cc3mlite_pretrain-ca93cd1f.pth" with your config that does not have any text model?
I add this line in config file. And add text model in the folder.
text_model_name = 'openai/clip-vit-base-patch32'
Access this here.
but isn't the important thing to define the model with text model also?
model = dict(type='YOLOWorldPromptDetector',
mm_neck=True,
num_train_classes=num_training_classes,
num_test_classes=num_classes,
embedding_path='embeddings/hospital_50.npy',
prompt_dim=text_channels,
num_prompts=50,
data_preprocessor=dict(type='YOLOv5DetDataPreprocessor'),
backbone=dict(_delete_=True,
type='MultiModalYOLOBackbone',
text_model=None,
image_model={{_base_.model.backbone}},
frozen_stages=4,
**with_text_model=False**), ----> So did you also change this part to True? @lin-whale
neck=dict(type='YOLOWorldPAFPN',
freeze_all=True,
guide_channels=text_channels,
embed_channels=neck_embed_channels,
num_heads=neck_num_heads,
block_cfg=dict(type='MaxSigmoidCSPLayerWithTwoConv')),
bbox_head=dict(type='YOLOWorldHead',
head_module=dict(
type='YOLOWorldHeadModule',
freeze_all=True,
use_bn_head=True,
embed_dims=text_channels,
num_classes=num_training_classes)),
train_cfg=dict(assigner=dict(num_classes=num_training_classes)))
@pelinsuacar Sure. Here is full config file.
_base_ = (
'../../third_party/mmyolo/configs/yolov8/'
'yolov8_l_mask-refine_syncbn_fast_8xb16-500e_coco.py')
custom_imports = dict(
imports=['yolo_world'],
allow_failed_imports=False)
# hyper-parameters
num_classes = 50
num_training_classes = 50
max_epochs = 80 # Maximum training epochs
close_mosaic_epochs = 10
save_epoch_intervals = 5
text_channels = 512
neck_embed_channels = [128, 256, _base_.last_stage_out_channels // 2]
neck_num_heads = [4, 8, _base_.last_stage_out_channels // 2 // 32]
base_lr = 2e-4
weight_decay = 0.05
train_batch_size_per_gpu = 64
load_from = 'pretrained_models/yolo_world_v2_l_obj365v1_goldg_cc3mlite_pretrain-ca93cd1f.pth'
# text_model_name = '../pretrained_models/clip-vit-base-patch32-projection'
text_model_name = 'openai/clip-vit-base-patch32'
persistent_workers = False
# model settings
model = dict(
type='YOLOWorldDetector',
mm_neck=True,
num_train_classes=num_training_classes,
num_test_classes=num_classes,
data_preprocessor=dict(type='YOLOWDetDataPreprocessor'),
backbone=dict(
_delete_=True,
type='MultiModalYOLOBackbone',
image_model={{_base_.model.backbone}},
text_model=dict(
type='HuggingCLIPLanguageBackbone',
model_name=text_model_name,
frozen_modules=['all'])),
neck=dict(type='YOLOWorldPAFPN',
guide_channels=text_channels,
embed_channels=neck_embed_channels,
num_heads=neck_num_heads,
block_cfg=dict(type='MaxSigmoidCSPLayerWithTwoConv')),
bbox_head=dict(type='YOLOWorldHead',
head_module=dict(type='YOLOWorldHeadModule',
use_bn_head=True,
embed_dims=text_channels,
num_classes=num_training_classes)),
train_cfg=dict(assigner=dict(num_classes=num_training_classes)))
# dataset settings
text_transform = [
dict(type='RandomLoadText',
num_neg_samples=(num_classes, num_classes),
max_num_samples=num_training_classes,
padding_to_max=True,
padding_value=''),
dict(type='mmdet.PackDetInputs',
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip',
'flip_direction', 'texts'))
]
mosaic_affine_transform = [
dict(
type='MultiModalMosaic',
img_scale=_base_.img_scale,
pad_val=114.0,
pre_transform=_base_.pre_transform),
dict(type='YOLOv5CopyPaste', prob=_base_.copypaste_prob),
dict(
type='YOLOv5RandomAffine',
max_rotate_degree=0.0,
max_shear_degree=0.0,
max_aspect_ratio=100.,
scaling_ratio_range=(1 - _base_.affine_scale,
1 + _base_.affine_scale),
# img_scale is (width, height)
border=(-_base_.img_scale[0] // 2, -_base_.img_scale[1] // 2),
border_val=(114, 114, 114),
min_area_ratio=_base_.min_area_ratio,
use_mask_refine=_base_.use_mask2refine)
]
train_pipeline = [
*_base_.pre_transform,
*mosaic_affine_transform,
dict(
type='YOLOv5MultiModalMixUp',
prob=_base_.mixup_prob,
pre_transform=[*_base_.pre_transform,
*mosaic_affine_transform]),
*_base_.last_transform[:-1],
*text_transform
]
train_pipeline_stage2 = [
*_base_.train_pipeline_stage2[:-1],
*text_transform
]
meta_info = dict(classes = ('floor', 'person', 'paper', 'bottle', 'paper cup', 'mask', 'thread', 'waiting bench', 'sturdy', 'plastic bag', 'table', 'packaging bag', 'door', 'carton box', 'sticker', 'screen', 'book', 'cotton ball', 'warning sign', 'rod', 'poster rack', 'vomit', 'blood', 'traffic cone', 'trash can', 'cart', 'rack', 'bag', 'flowerpot', 'medication', 'paper box', 'meal box', 'pericarp', 'hat', 'umbrella', 'drip stand', 'coffee stains', 'elevator entrance', 'escalator entrance', 'triage desk', 'registration machine', 'fire hydrant', 'hospital bed', 'milk stains', 'plinth', 'chair', 'wheel chair', 'swab', 'drinking cup', 'fallen leaves'))
coco_train_dataset = dict(
_delete_=True,
type='MultiModalDataset',
dataset=dict(
type='YOLOv5CocoDataset',
metainfo = meta_info,
# data_root='/data/cvat/train/2024-04-24-ann-cvat',
# ann_file='/data/cvat/train/2024-04-24-ann-cvat/annotations/annotations_train.json',
data_root='/data/cvat/train/bottle_train0429/2024-04-29-ann-cvat',
ann_file='/data/cvat/train/bottle_train0429/2024-04-29-ann-cvat/annotations/annotations.json',
data_prefix=dict(img='images/'),
filter_cfg=dict(filter_empty_gt=False, min_size=32)),
class_text_path='data/texts/hospital_class_texts.json',
pipeline=train_pipeline)
train_dataloader = dict(
persistent_workers=persistent_workers,
batch_size=train_batch_size_per_gpu,
collate_fn=dict(type='yolow_collate'),
dataset=coco_train_dataset)
test_pipeline = [
*_base_.test_pipeline[:-1],
dict(type='LoadText'),
dict(
type='mmdet.PackDetInputs',
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
'scale_factor', 'pad_param', 'texts'))
]
val_dataroot = "/data/cvat/train/bottle_test0429/2024-04-29-ann-cvat"
val_ann_file = "/data/cvat/train/bottle_test0429/2024-04-29-ann-cvat/annotations/annotations.json"
coco_val_dataset = dict(
_delete_=True,
type='MultiModalDataset',
dataset=dict(
type='YOLOv5CocoDataset',
metainfo = meta_info,
# data_root='/data/cvat/train/bottle/2024-04-28-ann-cvat',
# ann_file='/data/cvat/train/bottle/2024-04-28-ann-cvat/annotations/annotations.json',
data_root=val_dataroot,
ann_file=val_ann_file,
data_prefix=dict(img='images/'),
filter_cfg=dict(filter_empty_gt=False, min_size=32)),
class_text_path='data/texts/hospital_class_texts.json',
pipeline=test_pipeline)
val_dataloader = dict(
persistent_workers=persistent_workers,
batch_size=train_batch_size_per_gpu,
dataset=coco_val_dataset)
test_dataloader = val_dataloader
# training settings
default_hooks = dict(
param_scheduler=dict(
scheduler_type='linear',
lr_factor=0.01,
max_epochs=max_epochs),
checkpoint=dict(
max_keep_ckpts=-1,
save_best="coco/bbox_mAP_50",
interval=save_epoch_intervals))
custom_hooks = [
dict(
type='EMAHook',
ema_type='ExpMomentumEMA',
momentum=0.0001,
update_buffers=True,
strict_load=False,
priority=49),
dict(
type='mmdet.PipelineSwitchHook',
switch_epoch=max_epochs - close_mosaic_epochs,
switch_pipeline=train_pipeline_stage2)
]
train_cfg = dict(
max_epochs=max_epochs,
val_interval=5,
dynamic_intervals=[((max_epochs - close_mosaic_epochs),
_base_.val_interval_stage2)])
optim_wrapper = dict(
optimizer=dict(
_delete_=True,
type='AdamW',
lr=base_lr,
weight_decay=weight_decay,
batch_size_per_gpu=train_batch_size_per_gpu),
paramwise_cfg=dict(
custom_keys={'backbone.text_model': dict(lr_mult=0.01),
'logit_scale': dict(weight_decay=0.0)}),
constructor='YOLOWv5OptimizerConstructor')
# evaluation settings
val_evaluator = dict(
_delete_=True,
type='mmdet.CocoMetric',
proposal_nums=(100, 1, 10),
# ann_file='/data/cvat/train/bottle/2024-04-28-ann-cvat/annotations/annotations.json',
ann_file=val_ann_file,
metric='bbox',
classwise=True)
# test settings
test_evaluator = dict(
_delete_=True,
type='mmdet.CocoMetric',
proposal_nums=(100, 1, 10),
ann_file=val_ann_file,
# ann_file='/data/cvat/train/bottle/2024-04-28-ann-cvat/annotations/annotations.json',
metric='bbox',
classwise=True)
@pelinsuacar Sure. Here is full config file.
_base_ = ( '../../third_party/mmyolo/configs/yolov8/' 'yolov8_l_mask-refine_syncbn_fast_8xb16-500e_coco.py') custom_imports = dict( imports=['yolo_world'], allow_failed_imports=False) # hyper-parameters num_classes = 50 num_training_classes = 50 max_epochs = 80 # Maximum training epochs close_mosaic_epochs = 10 save_epoch_intervals = 5 text_channels = 512 neck_embed_channels = [128, 256, _base_.last_stage_out_channels // 2] neck_num_heads = [4, 8, _base_.last_stage_out_channels // 2 // 32] base_lr = 2e-4 weight_decay = 0.05 train_batch_size_per_gpu = 64 load_from = 'pretrained_models/yolo_world_v2_l_obj365v1_goldg_cc3mlite_pretrain-ca93cd1f.pth' # text_model_name = '../pretrained_models/clip-vit-base-patch32-projection' text_model_name = 'openai/clip-vit-base-patch32' persistent_workers = False # model settings model = dict( type='YOLOWorldDetector', mm_neck=True, num_train_classes=num_training_classes, num_test_classes=num_classes, data_preprocessor=dict(type='YOLOWDetDataPreprocessor'), backbone=dict( _delete_=True, type='MultiModalYOLOBackbone', image_model={{_base_.model.backbone}}, text_model=dict( type='HuggingCLIPLanguageBackbone', model_name=text_model_name, frozen_modules=['all'])), neck=dict(type='YOLOWorldPAFPN', guide_channels=text_channels, embed_channels=neck_embed_channels, num_heads=neck_num_heads, block_cfg=dict(type='MaxSigmoidCSPLayerWithTwoConv')), bbox_head=dict(type='YOLOWorldHead', head_module=dict(type='YOLOWorldHeadModule', use_bn_head=True, embed_dims=text_channels, num_classes=num_training_classes)), train_cfg=dict(assigner=dict(num_classes=num_training_classes))) # dataset settings text_transform = [ dict(type='RandomLoadText', num_neg_samples=(num_classes, num_classes), max_num_samples=num_training_classes, padding_to_max=True, padding_value=''), dict(type='mmdet.PackDetInputs', meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip', 'flip_direction', 'texts')) ] mosaic_affine_transform = [ dict( type='MultiModalMosaic', img_scale=_base_.img_scale, pad_val=114.0, pre_transform=_base_.pre_transform), dict(type='YOLOv5CopyPaste', prob=_base_.copypaste_prob), dict( type='YOLOv5RandomAffine', max_rotate_degree=0.0, max_shear_degree=0.0, max_aspect_ratio=100., scaling_ratio_range=(1 - _base_.affine_scale, 1 + _base_.affine_scale), # img_scale is (width, height) border=(-_base_.img_scale[0] // 2, -_base_.img_scale[1] // 2), border_val=(114, 114, 114), min_area_ratio=_base_.min_area_ratio, use_mask_refine=_base_.use_mask2refine) ] train_pipeline = [ *_base_.pre_transform, *mosaic_affine_transform, dict( type='YOLOv5MultiModalMixUp', prob=_base_.mixup_prob, pre_transform=[*_base_.pre_transform, *mosaic_affine_transform]), *_base_.last_transform[:-1], *text_transform ] train_pipeline_stage2 = [ *_base_.train_pipeline_stage2[:-1], *text_transform ] meta_info = dict(classes = ('floor', 'person', 'paper', 'bottle', 'paper cup', 'mask', 'thread', 'waiting bench', 'sturdy', 'plastic bag', 'table', 'packaging bag', 'door', 'carton box', 'sticker', 'screen', 'book', 'cotton ball', 'warning sign', 'rod', 'poster rack', 'vomit', 'blood', 'traffic cone', 'trash can', 'cart', 'rack', 'bag', 'flowerpot', 'medication', 'paper box', 'meal box', 'pericarp', 'hat', 'umbrella', 'drip stand', 'coffee stains', 'elevator entrance', 'escalator entrance', 'triage desk', 'registration machine', 'fire hydrant', 'hospital bed', 'milk stains', 'plinth', 'chair', 'wheel chair', 'swab', 'drinking cup', 'fallen leaves')) coco_train_dataset = dict( _delete_=True, type='MultiModalDataset', dataset=dict( type='YOLOv5CocoDataset', metainfo = meta_info, # data_root='/data/cvat/train/2024-04-24-ann-cvat', # ann_file='/data/cvat/train/2024-04-24-ann-cvat/annotations/annotations_train.json', data_root='/data/cvat/train/bottle_train0429/2024-04-29-ann-cvat', ann_file='/data/cvat/train/bottle_train0429/2024-04-29-ann-cvat/annotations/annotations.json', data_prefix=dict(img='images/'), filter_cfg=dict(filter_empty_gt=False, min_size=32)), class_text_path='data/texts/hospital_class_texts.json', pipeline=train_pipeline) train_dataloader = dict( persistent_workers=persistent_workers, batch_size=train_batch_size_per_gpu, collate_fn=dict(type='yolow_collate'), dataset=coco_train_dataset) test_pipeline = [ *_base_.test_pipeline[:-1], dict(type='LoadText'), dict( type='mmdet.PackDetInputs', meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'scale_factor', 'pad_param', 'texts')) ] val_dataroot = "/data/cvat/train/bottle_test0429/2024-04-29-ann-cvat" val_ann_file = "/data/cvat/train/bottle_test0429/2024-04-29-ann-cvat/annotations/annotations.json" coco_val_dataset = dict( _delete_=True, type='MultiModalDataset', dataset=dict( type='YOLOv5CocoDataset', metainfo = meta_info, # data_root='/data/cvat/train/bottle/2024-04-28-ann-cvat', # ann_file='/data/cvat/train/bottle/2024-04-28-ann-cvat/annotations/annotations.json', data_root=val_dataroot, ann_file=val_ann_file, data_prefix=dict(img='images/'), filter_cfg=dict(filter_empty_gt=False, min_size=32)), class_text_path='data/texts/hospital_class_texts.json', pipeline=test_pipeline) val_dataloader = dict( persistent_workers=persistent_workers, batch_size=train_batch_size_per_gpu, dataset=coco_val_dataset) test_dataloader = val_dataloader # training settings default_hooks = dict( param_scheduler=dict( scheduler_type='linear', lr_factor=0.01, max_epochs=max_epochs), checkpoint=dict( max_keep_ckpts=-1, save_best="coco/bbox_mAP_50", interval=save_epoch_intervals)) custom_hooks = [ dict( type='EMAHook', ema_type='ExpMomentumEMA', momentum=0.0001, update_buffers=True, strict_load=False, priority=49), dict( type='mmdet.PipelineSwitchHook', switch_epoch=max_epochs - close_mosaic_epochs, switch_pipeline=train_pipeline_stage2) ] train_cfg = dict( max_epochs=max_epochs, val_interval=5, dynamic_intervals=[((max_epochs - close_mosaic_epochs), _base_.val_interval_stage2)]) optim_wrapper = dict( optimizer=dict( _delete_=True, type='AdamW', lr=base_lr, weight_decay=weight_decay, batch_size_per_gpu=train_batch_size_per_gpu), paramwise_cfg=dict( custom_keys={'backbone.text_model': dict(lr_mult=0.01), 'logit_scale': dict(weight_decay=0.0)}), constructor='YOLOWv5OptimizerConstructor') # evaluation settings val_evaluator = dict( _delete_=True, type='mmdet.CocoMetric', proposal_nums=(100, 1, 10), # ann_file='/data/cvat/train/bottle/2024-04-28-ann-cvat/annotations/annotations.json', ann_file=val_ann_file, metric='bbox', classwise=True) # test settings test_evaluator = dict( _delete_=True, type='mmdet.CocoMetric', proposal_nums=(100, 1, 10), ann_file=val_ann_file, # ann_file='/data/cvat/train/bottle/2024-04-28-ann-cvat/annotations/annotations.json', metric='bbox', classwise=True)
oh okay I want to get rid of the language model by giving image embeddings of each class instead of class names as text. Since at the beginning you were also using embeddings, I thought it is still the same but now I see that you are using YOLOWorldDetector instead of SimpleYOLOWorldDetector
model = dict(type='YOLOWorldPromptDetector',请问您的这个检测器是自己定义的吗
您好,YOLOWorldPromptDetector这个版本已经被弃用了我的代码中没有这个,您可以传我一份那个yolo_world.py看一下吗
Training is successful but the preformance is not improved, and validation phase failed. The config is below, which is modified from configs/prompt_tuning_coco/yolo_world_v2_l_vlpan_bn_2e-4_80e_8gpus_mask-refine_prompt_tuning_coco.py