Open Charles-AntoineLaurin opened 3 months ago
Thats what I tried but kept getting errors. The format output from the anchor optimisation is a an an array of array and not an array of tuple ([[15, 21], [26, 30], [27, 40], [35, 68], [39, 82], [151, 44], [72, 131], [1011, 35], [462, 558]]) so when i plug it in my config, I get an error index out of bounds... I dont know if i need to change the arrays inside into a tuple but it would be weird since its the output given from the script...
Here is the logs including the config and the error message.
System environment: sys.platform: win32 Python: 3.9.19 (main, May 6 2024, 20:12:36) [MSC v.1916 64 bit (AMD64)] CUDA available: True MUSA available: False numpy_random_seed: 1256422072 GPU 0: NVIDIA GeForce RTX 4070 Ti SUPER CUDA_HOME: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.1 NVCC: Cuda compilation tools, release 12.1, V12.1.105 MSVC: Compilateur d'optimisation Microsoft (R) C/C++ versionÿ19.40.33811 pour x64 GCC: n/a PyTorch: 2.3.0 PyTorch compiling details: PyTorch built with:
Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=12.1, CUDNN_VERSION=8.8.1, CXX_COMPILER=C:/cb/pytorch_1000000000000/work/tmp_bin/sccache-cl.exe, CXX_FLAGS=/DWIN32 /D_WINDOWS /GR /EHsc /Zc:__cplusplus /bigobj /FS /utf-8 -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOCUPTI -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE /wd4624 /wd4068 /wd4067 /wd4267 /wd4661 /wd4717 /wd4244 /wd4804 /wd4273, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=2.3.0, USE_CUDA=ON, USE_CUDNN=ON, USE_CUSPARSELT=OFF, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_GLOO=ON, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=OFF, USE_NNPACK=OFF, USE_OPENMP=ON, USE_ROCM=OFF, USE_ROCM_KERNEL_ASSERT=OFF,
TorchVision: 0.18.0 OpenCV: 4.10.0 MMEngine: 0.10.4
08/22 09:24:31 - mmengine - INFO - Config:
auto_scale_lr = dict(base_batch_size=64, enable=False)
backend_args = None
classes = { 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h' }
data = dict(
test=dict(pipeline=[
dict(type='LoadImageFromFile'),
dict(
flip=False,
img_scale=(
320,
320,
),
transforms=[
dict(keep_ratio=True, type='Resize'),
dict(type='RandomFlip'),
dict(
mean=[
0,
0,
0,
],
std=[
255.0,
255.0,
255.0,
],
to_rgb=True,
type='Normalize'),
dict(size_divisor=32, type='Pad'),
dict(keys=[
'img',
], type='ImageToTensor'),
dict(keys=[
'img',
], type='Collect'),
],
type='MultiScaleFlipAug'),
]),
train=dict(pipeline=[
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True),
dict(
mean=[
0,
0,
0,
], ratio_range=(
1,
2,
), to_rgb=True, type='Expand'),
dict(
min_crop_size=0.3,
min_ious=(
0.4,
0.5,
0.6,
0.7,
0.8,
0.9,
),
type='MinIoURandomCrop'),
dict(img_scale=(
320,
320,
), keep_ratio=True, type='Resize'),
dict(flip_ratio=0.1, type='RandomFlip'),
dict(type='PhotoMetricDistortion'),
dict(
mean=[
0,
0,
0,
],
std=[
255.0,
255.0,
255.0,
],
to_rgb=True,
type='Normalize'),
dict(size_divisor=32, type='Pad'),
dict(type='DefaultFormatBundle'),
dict(keys=[
'img',
'gt_bboxes',
'gt_labels',
], type='Collect'),
]),
val=dict(pipeline=[
dict(type='LoadImageFromFile'),
dict(
flip=False,
img_scale=(
320,
320,
),
transforms=[
dict(keep_ratio=True, type='Resize'),
dict(type='RandomFlip'),
dict(
mean=[
0,
0,
0,
],
std=[
255.0,
255.0,
255.0,
],
to_rgb=True,
type='Normalize'),
dict(size_divisor=32, type='Pad'),
dict(keys=[
'img',
], type='ImageToTensor'),
dict(keys=[
'img',
], type='Collect'),
],
type='MultiScaleFlipAug'),
]))
data_preprocessor = dict(
bgr_to_rgb=True,
mean=[
0,
0,
0,
],
pad_size_divisor=32,
std=[
255.0,
255.0,
255.0,
],
type='DetDataPreprocessor')
data_root = './data'
dataset_type = 'CocoDataset'
default_hooks = dict(
checkpoint=dict(
interval=5, max_keep_ckpts=5, save_best='auto', type='CheckpointHook'),
logger=dict(interval=50, type='LoggerHook'),
param_scheduler=dict(type='ParamSchedulerHook'),
sampler_seed=dict(type='DistSamplerSeedHook'),
timer=dict(type='IterTimerHook'),
visualization=dict(type='DetVisualizationHook'))
default_scope = 'mmdet'
env_cfg = dict(
cudnn_benchmark=False,
dist_cfg=dict(backend='nccl'),
mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0))
img_norm_cfg = dict(
mean=[
0,
0,
0,
], std=[
255.0,
255.0,
255.0,
], to_rgb=True)
launcher = 'none'
load_from = './previous_run.pth'
log_level = 'INFO'
log_processor = dict(by_epoch=True, type='LogProcessor', window_size=50)
model = dict(
backbone=dict(
depth=53,
init_cfg=dict(checkpoint='open-mmlab://darknet53', type='Pretrained'),
out_indices=(
3,
4,
5,
),
type='Darknet'),
bbox_head=dict(
anchor_generator=dict(
base_sizes=[
(
15,
21,
),
(
26,
30,
),
(
27,
40,
),
(
35,
68,
),
(
39,
82,
),
(
151,
44,
),
(
72,
131,
),
(
1011,
35,
),
(
462,
558,
),
],
strides=[
32,
16,
8,
],
type='YOLOAnchorGenerator'),
bbox_coder=dict(type='YOLOBBoxCoder'),
featmap_strides=[
32,
16,
8,
],
in_channels=[
512,
256,
128,
],
loss_cls=dict(
loss_weight=1.0,
reduction='sum',
type='CrossEntropyLoss',
use_sigmoid=True),
loss_conf=dict(
loss_weight=1.0,
reduction='sum',
type='CrossEntropyLoss',
use_sigmoid=True),
loss_wh=dict(loss_weight=2.0, reduction='sum', type='MSELoss'),
loss_xy=dict(
loss_weight=2.0,
reduction='sum',
type='CrossEntropyLoss',
use_sigmoid=True),
num_classes=8,
out_channels=[
1024,
512,
256,
],
type='YOLOV3Head'),
data_preprocessor=dict(
bgr_to_rgb=True,
mean=[
0,
0,
0,
],
pad_size_divisor=32,
std=[
255.0,
255.0,
255.0,
],
type='DetDataPreprocessor'),
neck=dict(
in_channels=[
1024,
512,
256,
],
num_scales=3,
out_channels=[
512,
256,
128,
],
type='YOLOV3Neck'),
test_cfg=dict(
conf_thr=0.005,
max_per_img=100,
min_bbox_size=0,
nms=dict(iou_threshold=0.45, type='nms'),
nms_pre=1000,
score_thr=0.05),
train_cfg=dict(
assigner=dict(
min_pos_iou=0,
neg_iou_thr=0.5,
pos_iou_thr=0.5,
type='GridAssigner')),
type='YOLOV3')
optim_wrapper = dict(
clip_grad=dict(max_norm=35, norm_type=2),
optimizer=dict(lr=0.001, momentum=0.9, type='SGD', weight_decay=0.0005),
type='OptimWrapper')
param_scheduler = [
dict(begin=0, by_epoch=False, end=2000, start_factor=0.1, type='LinearLR'),
dict(
by_epoch=True, gamma=0.1, milestones=[
218,
246,
], type='MultiStepLR'),
]
resume = False
test_cfg = dict(type='TestLoop')
test_dataloader = dict(
batch_size=1,
dataset=dict(
ann_file=
'./data/TestingData/result.json',
backend_args=None,
data_prefix=dict(
img=
'./data/TestingData/images'
),
data_root=
'./data/TestingData/',
metainfo=dict(
classes={ 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'}
),
pipeline=[
dict(backend_args=None, type='LoadImageFromFile'),
dict(keep_ratio=True, scale=(
608,
608,
), type='Resize'),
dict(type='LoadAnnotations', with_bbox=True),
dict(
meta_keys=(
'img_id',
'img_path',
'ori_shape',
'img_shape',
'scale_factor',
),
type='PackDetInputs'),
],
test_mode=True,
type='CocoDataset'),
drop_last=False,
num_workers=2,
persistent_workers=True,
sampler=dict(shuffle=False, type='DefaultSampler'))
test_evaluator = dict(
ann_file=
'./data/TestingData/result.json',
backend_args=None,
metric='bbox',
type='CocoMetric')
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
flip=False,
img_scale=(
320,
320,
),
transforms=[
dict(keep_ratio=True, type='Resize'),
dict(type='RandomFlip'),
dict(
mean=[
0,
0,
0,
],
std=[
255.0,
255.0,
255.0,
],
to_rgb=True,
type='Normalize'),
dict(size_divisor=32, type='Pad'),
dict(keys=[
'img',
], type='ImageToTensor'),
dict(keys=[
'img',
], type='Collect'),
],
type='MultiScaleFlipAug'),
]
train_cfg = dict(max_epochs=273, type='EpochBasedTrainLoop', val_interval=5)
train_dataloader = dict(
batch_sampler=dict(type='AspectRatioBatchSampler'),
batch_size=8,
dataset=dict(
ann_file=
'./data/TrainingData/result.json',
backend_args=None,
data_prefix=dict(
img=
'./data/TrainingData/images'
),
data_root=
'./data/TrainingData',
filter_cfg=dict(filter_empty_gt=True, min_size=32),
metainfo=dict(
classes={
'a', 'b', 'c', 'd','e', 'f', 'g', 'h'
}),
pipeline=[
dict(backend_args=None, type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True),
dict(
mean=[
0,
0,
0,
],
ratio_range=(
1,
2,
),
to_rgb=True,
type='Expand'),
dict(
min_crop_size=0.3,
min_ious=(
0.4,
0.5,
0.6,
0.7,
0.8,
0.9,
),
type='MinIoURandomCrop'),
dict(
keep_ratio=True,
scale=[
(
320,
320,
),
(
608,
608,
),
],
type='RandomResize'),
dict(prob=0.1, type='RandomFlip'),
dict(type='PhotoMetricDistortion'),
dict(type='PackDetInputs'),
],
type='CocoDataset'),
num_workers=4,
persistent_workers=True,
sampler=dict(shuffle=True, type='DefaultSampler'))
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True),
dict(mean=[
0,
0,
0,
], ratio_range=(
1,
2,
), to_rgb=True, type='Expand'),
dict(
min_crop_size=0.3,
min_ious=(
0.4,
0.5,
0.6,
0.7,
0.8,
0.9,
),
type='MinIoURandomCrop'),
dict(img_scale=(
320,
320,
), keep_ratio=True, type='Resize'),
dict(flip_ratio=0.1, type='RandomFlip'),
dict(type='PhotoMetricDistortion'),
dict(
mean=[
0,
0,
0,
],
std=[
255.0,
255.0,
255.0,
],
to_rgb=True,
type='Normalize'),
dict(size_divisor=32, type='Pad'),
dict(type='DefaultFormatBundle'),
dict(keys=[
'img',
'gt_bboxes',
'gt_labels',
], type='Collect'),
]
val_cfg = dict(type='ValLoop')
val_dataloader = dict(
batch_size=1,
dataset=dict(
ann_file=
'./data/ValidationData/result.json',
backend_args=None,
data_prefix=dict(
img=
'./data/ValidationData/images'
),
data_root=
'./data/ValidationData/',
metainfo=dict(
classes={
'a', 'b', 'c', 'd',
'e', 'f', 'g', 'h'
}),
pipeline=[
dict(backend_args=None, type='LoadImageFromFile'),
dict(keep_ratio=True, scale=(
608,
608,
), type='Resize'),
dict(type='LoadAnnotations', with_bbox=True),
dict(
meta_keys=(
'img_id',
'img_path',
'ori_shape',
'img_shape',
'scale_factor',
),
type='PackDetInputs'),
],
test_mode=True,
type='CocoDataset'),
drop_last=False,
num_workers=2,
persistent_workers=True,
sampler=dict(shuffle=False, type='DefaultSampler'))
val_evaluator = dict(
ann_file=
'./data/ValidationData/result.json',
backend_args=None,
metric='bbox',
type='CocoMetric')
vis_backends = [
dict(type='LocalVisBackend'),
]
visualizer = dict(
name='visualizer',
type='DetLocalVisualizer',
vis_backends=[
dict(type='LocalVisBackend'),
])
work_dir = './workdir/new_anchor'
Traceback (most recent call last):
File "MMDETECTION_ROOT\tools\train.py", line 124, in
Just find out you need to manually add arrays and convert the other one as tuples... Can anyone confirm that this is the right way to do it? And if so, is it possible to add the missing array brackets in the output of the script.
虽然没实际用过,应该是要换成元组的,但是你img_scale 608 聚类出来的anchor看起来数值不对啊,有1011, 你确定不是你复制错吗base_sizes=[ ( 15, 21, ), ( 26, 30, ), ( 27, 40, ), ( 35, 68, ), ( 39, 82, ), ( 151, 44, ), ( 72, 131, ), ( 1011, #? 35, ), ( 462, 558, ), ], strides=[ 32, 16, 8, ], type='YOLOAnchorGenerator'),
只是发现您需要手动添加数组并将另一个转换为元组...有人可以确认这是正确的方法吗?如果是这样,是否可以在脚本的输出中添加缺少的数组括号。
是的,按照原来的config格式改成内嵌元组的形式
Hi everyone,
I'm working on a project and using a YOLOv3 model. I was trying to find ways to optimize it and saw that it was possible to optimize YOLO anchors https://mmdetection.readthedocs.io/en/v2.17.0/useful_tools.html#yolo-anchor-optimization.
The thing is I don't know how to use the output array given by the script. I assume i must insert it somewhere in my config but I can't find the right place to put it. I don't know if i misread or missed the documentation about it but i can't seem to find anything on how to properly use the information given by the script.
Is it possible to have any indications on how I should use it?