WongKinYiu / ScaledYOLOv4

Scaled-YOLOv4: Scaling Cross Stage Partial Network
GNU General Public License v3.0
2.01k stars 570 forks source link

Does YOLOv4-csp need Greater resolution than YOLOv4 #105

Open Linchunhui opened 3 years ago

Linchunhui commented 3 years ago

I train YOLOv4-csp with resolution 384x384,the loss is still greater than 16 after 60000 iterations,while i can get good results in YOLOv4 with 384x384。I update the latest version of darknet,and the cfg and pretrained weight are latest yolov4-csp.cfg and yolov4-csp.conv.142。 this is my training cfg

  [net]
# Testing
#batch=1
#subdivisions=1
# Training
batch=64
subdivisions=16
width=384   
height=384
channels=3
momentum=0.949
decay=0.0005
angle=0
saturation = 1.5
exposure = 1.5
hue=.1

learning_rate=0.0001
burn_in=1000
max_batches = 500500
policy=steps
steps=400000,450000
scales=.1,.1

mosaic=0

letter_box=1

ema_alpha=0.9998

optimized_memory=0
try_fix_nan=1
#23:104x104 54:52x52 85:26x26 104:13x13 for 416

[convolutional]
batch_normalize=1
filters=32
size=3
stride=1
pad=1
activation=relu

# Downsample

[convolutional]
batch_normalize=1
filters=64
size=3
stride=2
pad=1
activation=relu

#[convolutional]
#batch_normalize=1
#filters=64
#size=1
#stride=1
#pad=1
#activation=relu

#[route]
#layers = -2

#[convolutional]
#batch_normalize=1
#filters=64
#size=1
#stride=1
#pad=1
#activation=relu

[convolutional]
batch_normalize=1
filters=32
size=1
stride=1
pad=1
activation=relu

[convolutional]
batch_normalize=1
filters=64
size=3
stride=1
pad=1
activation=relu

[shortcut]
from=-3
activation=linear

#[convolutional]
#batch_normalize=1
#filters=64
#size=1
#stride=1
#pad=1
#activation=relu

#[route]
#layers = -1,-7

#[convolutional]
#batch_normalize=1
#filters=64
#size=1
#stride=1
#pad=1
#activation=relu

# Downsample

[convolutional]
batch_normalize=1
filters=128
size=3
stride=2
pad=1
activation=relu

[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=relu

[route]
layers = -2

[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=relu

[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=relu

[convolutional]
batch_normalize=1
filters=64
size=3
stride=1
pad=1
activation=relu

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=relu

[convolutional]
batch_normalize=1
filters=64
size=3
stride=1
pad=1
activation=relu

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=relu

[route]
layers = -1,-10

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=relu

# Downsample

[convolutional]
batch_normalize=1
filters=256
size=3
stride=2
pad=1
activation=relu

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=relu

[route]
layers = -2

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=relu

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=relu

[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=relu

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=relu

[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=relu

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=relu

[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=relu

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=relu

[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=relu

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=relu

[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=relu

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=relu

[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=relu

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=relu

[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=relu

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=relu

[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=relu

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=relu

[route]
layers = -1,-28

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=relu

# Downsample

[convolutional]
batch_normalize=1
filters=512
size=3
stride=2
pad=1
activation=relu

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=relu

[route]
layers = -2

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=relu

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=relu

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=relu

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=relu

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=relu

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=relu

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=relu

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=relu

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=relu

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=relu

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=relu

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=relu

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=relu

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=relu

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=relu

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=relu

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=relu

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=relu

[route]
layers = -1,-28

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=relu

# Downsample

[convolutional]
batch_normalize=1
filters=1024
size=3
stride=2
pad=1
activation=relu

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=relu

[route]
layers = -2

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=relu

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=relu

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=relu

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=relu

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=relu

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=relu

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=relu

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=relu

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=relu

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=relu

[route]
layers = -1,-16

[convolutional]
batch_normalize=1
filters=1024
size=1
stride=1
pad=1
activation=relu

##########################

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=relu

[route]
layers = -2

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=relu

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=512
activation=relu

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=relu

### SPP ###
[maxpool]
stride=1
size=5

[route]
layers=-2

[maxpool]
stride=1
size=9

[route]
layers=-4

[maxpool]
stride=1
size=13

[route]
layers=-1,-3,-5,-6
### End SPP ###

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=relu

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=512
activation=relu

[route]
layers = -1, -13

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=relu

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=relu

[upsample]
stride=2

[route]
layers = 79

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=relu

[route]
layers = -1, -3

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=relu

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=relu

[route]
layers = -2

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=relu

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=256
activation=relu

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=relu

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=256
activation=relu

[route]
layers = -1, -6

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=relu

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=relu

[upsample]
stride=2

[route]
layers = 48

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=relu

[route]
layers = -1, -3

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=relu

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=relu

[route]
layers = -2

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=relu

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=128
activation=relu

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=relu

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=128
activation=relu

[route]
layers = -1, -6

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=relu

##########################

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=256
activation=relu

[convolutional]
size=1
stride=1
pad=1
filters=27
activation=logistic

[yolo]
mask = 0,1,2
anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401
classes=4
num=9
jitter=.1
scale_x_y = 2.0
objectness_smooth=0
ignore_thresh = .7
truth_thresh = 1
#random=1
resize=1.5
iou_thresh=0.2
iou_normalizer=0.05
cls_normalizer=0.5
obj_normalizer=4.0
iou_loss=ciou
nms_kind=diounms
beta_nms=0.6
new_coords=1
max_delta=20

[route]
layers = -4

[convolutional]
batch_normalize=1
size=3
stride=2
pad=1
filters=256
activation=relu

[route]
layers = -1, -20

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=relu

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=relu

[route]
layers = -2

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=relu

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=256
activation=relu

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=relu

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=256
activation=relu

[route]
layers = -1,-6

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=relu

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=512
activation=relu

[convolutional]
size=1
stride=1
pad=1
filters=27
activation=logistic

[yolo]
mask = 3,4,5
anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401
classes=4
num=9
jitter=.1
scale_x_y = 2.0
objectness_smooth=1
ignore_thresh = .7
truth_thresh = 1
#random=1
resize=1.5
iou_thresh=0.2
iou_normalizer=0.05
cls_normalizer=0.5
obj_normalizer=1.0
iou_loss=ciou
nms_kind=diounms
beta_nms=0.6
new_coords=1
max_delta=5

[route]
layers = -4

[convolutional]
batch_normalize=1
size=3
stride=2
pad=1
filters=512
activation=relu

[route]
layers = -1, -49

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=relu

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=relu

[route]
layers = -2

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=relu

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=512
activation=relu

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=relu

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=512
activation=relu

[route]
layers = -1,-6

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=relu

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=relu

[convolutional]
size=1
stride=1
pad=1
filters=27
activation=logistic

[yolo]
mask = 6,7,8
anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401
classes=4
num=9
jitter=.1
scale_x_y = 2.0
objectness_smooth=1
ignore_thresh = .7
truth_thresh = 1
#random=1
resize=1.5
iou_thresh=0.2
iou_normalizer=0.05
cls_normalizer=0.5
obj_normalizer=0.4
iou_loss=ciou
nms_kind=diounms
beta_nms=0.6
new_coords=1
max_delta=2

and training chart chart

WongKinYiu commented 3 years ago

see value of map instead value of loss.

Linchunhui commented 3 years ago

Map is 80% while YOLOv4 is more than 90% at the same iterations, does it just needs more training iterations?

yutao007 commented 3 years ago

learning_rate=0.0001 why your learning_rate isn't 0.001