Open guishilike opened 2 years ago
Training for all my networks takes < 2 hours.
You failed to tell us:
1) What configuration. 2) What network dimensions. 3) What hardware you are using. 4) How or if you resized your images.
See: https://www.ccoderun.ca/programming/darknet_faq/#time_to_train
network: use yolov4-csp-swish.cfg 640x640 image: The original image is 8 megapixels,so it is compressed a lot gpu:RTX 3070 8G (GPU memory is only 8G,so I set batch=64, subdivisions=32) cfg:
[net]
# Testing
#batch=1
#subdivisions=1
# Training
batch=64
subdivisions=32
width=640
height=640
channels=3
momentum=0.949
decay=0.0005
angle=0
saturation = 1.5
exposure = 1.5
hue=.1
learning_rate=0.0013
burn_in=1000
max_batches = 12000
policy=steps
steps=9600,10800
scales=.1,.1
mosaic=1
letter_box=1
ema_alpha=0.9998
#optimized_memory=1
# ============ Backbone ============ #
# Stem
# 0
[convolutional]
batch_normalize=1
filters=32
size=3
stride=1
pad=1
activation=swish
# P1
# Downsample
[convolutional]
batch_normalize=1
filters=64
size=3
stride=2
pad=1
activation=swish
# Residual Block
[convolutional]
batch_normalize=1
filters=32
size=1
stride=1
pad=1
activation=swish
[convolutional]
batch_normalize=1
filters=64
size=3
stride=1
pad=1
activation=swish
# 4 (previous+1+3k)
[shortcut]
from=-3
activation=linear
# P2
# Downsample
[convolutional]
batch_normalize=1
filters=128
size=3
stride=2
pad=1
activation=swish
# Split
[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=swish
[route]
layers = -2
[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=swish
# Residual Block
[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=swish
[convolutional]
batch_normalize=1
filters=64
size=3
stride=1
pad=1
activation=swish
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=swish
[convolutional]
batch_normalize=1
filters=64
size=3
stride=1
pad=1
activation=swish
[shortcut]
from=-3
activation=linear
# Transition first
[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=swish
# Merge [-1, -(3k+4)]
[route]
layers = -1,-10
# Transition last
# 17 (previous+7+3k)
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=swish
# P3
# Downsample
[convolutional]
batch_normalize=1
filters=256
size=3
stride=2
pad=1
activation=swish
# Split
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=swish
[route]
layers = -2
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=swish
# Residual Block
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=swish
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=swish
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=swish
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=swish
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=swish
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=swish
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=swish
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=swish
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=swish
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=swish
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=swish
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=swish
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=swish
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=swish
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=swish
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=swish
[shortcut]
from=-3
activation=linear
# Transition first
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=swish
# Merge [-1 -(4+3k)]
[route]
layers = -1,-28
# Transition last
# 48 (previous+7+3k)
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=swish
# P4
# Downsample
[convolutional]
batch_normalize=1
filters=512
size=3
stride=2
pad=1
activation=swish
# Split
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=swish
[route]
layers = -2
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=swish
# Residual Block
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=swish
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=swish
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=swish
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=swish
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=swish
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=swish
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=swish
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=swish
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=swish
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=swish
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=swish
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=swish
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=swish
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=swish
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=swish
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=swish
[shortcut]
from=-3
activation=linear
# Transition first
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=swish
# Merge [-1 -(3k+4)]
[route]
layers = -1,-28
# Transition last
# 79 (previous+7+3k)
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=swish
# P5
# Downsample
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=2
pad=1
activation=swish
# Split
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=swish
[route]
layers = -2
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=swish
# Residual Block
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=swish
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=swish
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=swish
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=swish
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=swish
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=swish
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=swish
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=swish
[shortcut]
from=-3
activation=linear
# Transition first
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=swish
# Merge [-1 -(3k+4)]
[route]
layers = -1,-16
# Transition last
# 98 (previous+7+3k)
[convolutional]
batch_normalize=1
filters=1024
size=1
stride=1
pad=1
activation=swish
# ============ End of Backbone ============ #
# ============ Neck ============ #
# CSPSPP
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=swish
[route]
layers = -2
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=swish
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=512
activation=swish
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=swish
### SPP ###
[maxpool]
stride=1
size=5
[route]
layers=-2
[maxpool]
stride=1
size=9
[route]
layers=-4
[maxpool]
stride=1
size=13
[route]
layers=-1,-3,-5,-6
### End SPP ###
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=swish
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=512
activation=swish
[route]
layers = -1, -13
# 113 (previous+6+5+2k)
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=swish
# End of CSPSPP
# FPN-4
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=swish
[upsample]
stride=2
[route]
layers = 79
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=swish
[route]
layers = -1, -3
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=swish
# Split
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=swish
[route]
layers = -2
# Plain Block
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=swish
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=256
activation=swish
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=swish
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=256
activation=swish
# Merge [-1, -(2k+2)]
[route]
layers = -1, -6
# Transition last
# 127 (previous+6+4+2k)
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=swish
# FPN-3
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=swish
[upsample]
stride=2
[route]
layers = 48
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=swish
[route]
layers = -1, -3
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=swish
# Split
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=swish
[route]
layers = -2
# Plain Block
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=swish
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=128
activation=swish
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=swish
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=128
activation=swish
# Merge [-1, -(2k+2)]
[route]
layers = -1, -6
# Transition last
# 141 (previous+6+4+2k)
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=swish
# PAN-4
[convolutional]
batch_normalize=1
size=3
stride=2
pad=1
filters=256
activation=swish
[route]
layers = -1, 127
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=swish
# Split
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=swish
[route]
layers = -2
# Plain Block
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=swish
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=256
activation=swish
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=swish
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=256
activation=swish
[route]
layers = -1,-6
# Transition last
# 152 (previous+3+4+2k)
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=swish
# PAN-5
[convolutional]
batch_normalize=1
size=3
stride=2
pad=1
filters=512
activation=swish
[route]
layers = -1, 113
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=swish
# Split
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=swish
[route]
layers = -2
# Plain Block
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=swish
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=512
activation=swish
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=swish
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=512
activation=swish
[route]
layers = -1,-6
# Transition last
# 163 (previous+3+4+2k)
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=swish
stopbackward=900
# ============ End of Neck ============ #
# ============ Head ============ #
# YOLO-3
[route]
layers = 141
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=256
activation=swish
[convolutional]
size=1
stride=1
pad=1
filters=24
activation=logistic
[yolo]
mask = 0,1,2
anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401
classes=3
num=9
jitter=.1
scale_x_y = 2.0
objectness_smooth=1
ignore_thresh = .7
truth_thresh = 1
#random=1
resize=1.5
#iou_thresh=0.4
iou_normalizer=0.05
cls_normalizer=0.5
obj_normalizer=0.4
iou_loss=ciou
nms_kind=diounms
beta_nms=0.6
new_coords=1
max_delta=2
# YOLO-4
[route]
layers = 152
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=512
activation=swish
[convolutional]
size=1
stride=1
pad=1
filters=24
activation=logistic
[yolo]
mask = 3,4,5
anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401
classes=3
num=9
jitter=.1
scale_x_y = 2.0
objectness_smooth=1
ignore_thresh = .7
truth_thresh = 1
#random=1
resize=1.5
#iou_thresh=0.4
iou_normalizer=0.05
cls_normalizer=0.5
obj_normalizer=0.4
iou_loss=ciou
nms_kind=diounms
beta_nms=0.6
new_coords=1
max_delta=2
# YOLO-5
[route]
layers = 163
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=swish
[convolutional]
size=1
stride=1
pad=1
filters=24
activation=logistic
[yolo]
mask = 6,7,8
anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401
classes=3
num=9
jitter=.1
scale_x_y = 2.0
objectness_smooth=1
ignore_thresh = .7
truth_thresh = 1
#random=1
resize=1.5
#iou_thresh=0.4
iou_normalizer=0.05
cls_normalizer=0.5
obj_normalizer=0.4
iou_loss=ciou
nms_kind=diounms
beta_nms=0.6
new_coords=1
max_delta=2
If I understand correctly what YOLO is doing here, the effective batch size (minibatch) is 2 (batch size divided by subdivisions). This is a very small batch thus you'll need to train for a long time to get through the entire dataset. I'd try decreasing the number of subdivisions until the minibatch can no longer fit on your GPU.
This is already the smallest subdivision
Try to decrease the width and height to 416, resize the training images before training. Make sure u compiled darknet with cuda enabled.
Darknet training speed feels much slower than yolov5. Yolov5 may take 2 hours, while darknet needs one night.