Open MrCuiHao opened 4 years ago
Set max_batches=6000
and train from scratch.
If it doesn't help - try to train with this pre-trained file: https://github.com/WongKinYiu/CrossStagePartialNetworks/blob/master/weight/enetb0_final.weights
Thank you very much@AlexeyAB ,and I want to ask other quesions: 1、I used the following command to extract the pre-trained weights file downloaded at the website that you give, is it right? and I don't know how to set the number at the end of this command, 134 or 135 or ***? ....\darknet.exe partial enetb0.cfg pretrained-convolutional-weights\enetb0_final.weights pretrained-convolutional-weights\enetb0.conv.135 135
2、why does the console print the TOP5 value rather than TOP1? Does that train command have problem or other question?
3、At present, I added some Fire-Smoke images making these two classes of data have the same amount,and I also used the pre-trained weights file enetb0.conv.135 extracted at enetb0_final.weights, the current training result is as follows, I hope the result can be better:
hello, AlexeyAB: 1、I suddenly want to do the job about image classification with darknet, I am going to classify the whole image including two classes :Fire-Smoke and non-Fire-Smoke. 2、 so ,I prepared 1312 Fire-Smoke images and 2688 non-Fire-Smoke 3、imagenet.data: classes=2 train = train.list valid = valid.list backup = backup/ labels = labels.list names = shortnames.list top=1 4、efficientnet_b0.cfg : [net]
Training
batch=120 subdivisions=12
Testing
batch=1
subdivisions=1
height=224 width=224 channels=3 momentum=0.9 decay=0.0005 max_crop=256
mixup=4
blur=1 cutmix=1 mosaic=1
burn_in=1000
burn_in=100
learning_rate=0.256 policy=poly power=4 max_batches=1600 momentum=0.9 decay=0.00005
angle=7 hue=.1 saturation=.75 exposure=.75 aspect=.75
CONV1 - 1 (1)
conv1
[convolutional] filters=32 size=3 pad=1 stride=2 batch_normalize=1 activation=swish
CONV2 - MBConv1 - 1 (1)
conv2_1_expand
[convolutional] filters=32 size=1 stride=1 pad=0 batch_normalize=1 activation=swish
conv2_1_dwise
[convolutional] groups=32 filters=32 size=3 stride=1 pad=1 batch_normalize=1 activation=swish
squeeze-n-excitation
[avgpool]
squeeze ratio r=4 (recommended r=16)
[convolutional] filters=8 size=1 stride=1 activation=swish
excitation
[convolutional] filters=32 size=1 stride=1 activation=logistic
multiply channels
[scale_channels] from=-4
conv2_1_linear
[convolutional] filters=16 size=1 stride=1 pad=0 batch_normalize=1 activation=linear
CONV3 - MBConv6 - 1 (2)
conv2_2_expand
[convolutional] filters=96 size=1 stride=1 pad=0 batch_normalize=1 activation=swish
conv2_2_dwise
[convolutional] groups=96 filters=96 size=3 pad=1 stride=2 batch_normalize=1 activation=swish
squeeze-n-excitation
[avgpool]
squeeze ratio r=8 (recommended r=16)
[convolutional] filters=16 size=1 stride=1 activation=swish
excitation
[convolutional] filters=96 size=1 stride=1 activation=logistic
multiply channels
[scale_channels] from=-4
conv2_2_linear
[convolutional] filters=24 size=1 stride=1 pad=0 batch_normalize=1 activation=linear
CONV3 - MBConv6 - 2 (2)
conv3_1_expand
[convolutional] filters=144 size=1 stride=1 pad=0 batch_normalize=1 activation=swish
conv3_1_dwise
[convolutional] groups=144 filters=144 size=3 stride=1 pad=1 batch_normalize=1 activation=swish
squeeze-n-excitation
[avgpool]
squeeze ratio r=16 (recommended r=16)
[convolutional] filters=8 size=1 stride=1 activation=swish
excitation
[convolutional] filters=144 size=1 stride=1 activation=logistic
multiply channels
[scale_channels] from=-4
conv3_1_linear
[convolutional] filters=24 size=1 stride=1 pad=0 batch_normalize=1 activation=linear
CONV4 - MBConv6 - 1 (2)
dropout only before residual connection
[dropout] probability=.2
block_3_1
[shortcut] from=-9 activation=linear
conv_3_2_expand
[convolutional] filters=144 size=1 stride=1 pad=0 batch_normalize=1 activation=swish
conv_3_2_dwise
[convolutional] groups=144 filters=144 size=5 pad=1 stride=2 batch_normalize=1 activation=swish
squeeze-n-excitation
[avgpool]
squeeze ratio r=16 (recommended r=16)
[convolutional] filters=8 size=1 stride=1 activation=swish
excitation
[convolutional] filters=144 size=1 stride=1 activation=logistic
multiply channels
[scale_channels] from=-4
conv_3_2_linear
[convolutional] filters=40 size=1 stride=1 pad=0 batch_normalize=1 activation=linear
CONV4 - MBConv6 - 2 (2)
conv_4_1_expand
[convolutional] filters=192 size=1 stride=1 pad=0 batch_normalize=1 activation=swish
conv_4_1_dwise
[convolutional] groups=192 filters=192 size=5 stride=1 pad=1 batch_normalize=1 activation=swish
squeeze-n-excitation
[avgpool]
squeeze ratio r=16 (recommended r=16)
[convolutional] filters=16 size=1 stride=1 activation=swish
excitation
[convolutional] filters=192 size=1 stride=1 activation=logistic
multiply channels
[scale_channels] from=-4
conv_4_1_linear
[convolutional] filters=40 size=1 stride=1 pad=0 batch_normalize=1 activation=linear
CONV5 - MBConv6 - 1 (3)
dropout only before residual connection
[dropout] probability=.2
block_4_2
[shortcut] from=-9 activation=linear
conv_4_3_expand
[convolutional] filters=192 size=1 stride=1 pad=0 batch_normalize=1 activation=swish
conv_4_3_dwise
[convolutional] groups=192 filters=192 size=3 stride=1 pad=1 batch_normalize=1 activation=swish
squeeze-n-excitation
[avgpool]
squeeze ratio r=16 (recommended r=16)
[convolutional] filters=16 size=1 stride=1 activation=swish
excitation
[convolutional] filters=192 size=1 stride=1 activation=logistic
multiply channels
[scale_channels] from=-4
conv_4_3_linear
[convolutional] filters=80 size=1 stride=1 pad=0 batch_normalize=1 activation=linear
CONV5 - MBConv6 - 2 (3)
conv_4_4_expand
[convolutional] filters=384 size=1 stride=1 pad=0 batch_normalize=1 activation=swish
conv_4_4_dwise
[convolutional] groups=384 filters=384 size=3 stride=1 pad=1 batch_normalize=1 activation=swish
squeeze-n-excitation
[avgpool]
squeeze ratio r=16 (recommended r=16)
[convolutional] filters=24 size=1 stride=1 activation=swish
excitation
[convolutional] filters=384 size=1 stride=1 activation=logistic
multiply channels
[scale_channels] from=-4
conv_4_4_linear
[convolutional] filters=80 size=1 stride=1 pad=0 batch_normalize=1 activation=linear
CONV5 - MBConv6 - 3 (3)
dropout only before residual connection
[dropout] probability=.2
block_4_4
[shortcut] from=-9 activation=linear
conv_4_5_expand
[convolutional] filters=384 size=1 stride=1 pad=0 batch_normalize=1 activation=swish
conv_4_5_dwise
[convolutional] groups=384 filters=384 size=3 stride=1 pad=1 batch_normalize=1 activation=swish
squeeze-n-excitation
[avgpool]
squeeze ratio r=16 (recommended r=16)
[convolutional] filters=24 size=1 stride=1 activation=swish
excitation
[convolutional] filters=384 size=1 stride=1 activation=logistic
multiply channels
[scale_channels] from=-4
conv_4_5_linear
[convolutional] filters=80 size=1 stride=1 pad=0 batch_normalize=1 activation=linear
CONV6 - MBConv6 - 1 (3)
dropout only before residual connection
[dropout] probability=.2
block_4_6
[shortcut] from=-9 activation=linear
conv_4_7_expand
[convolutional] filters=384 size=1 stride=1 pad=0 batch_normalize=1 activation=swish
conv_4_7_dwise
[convolutional] groups=384 filters=384 size=5 pad=1 stride=2 batch_normalize=1 activation=swish
squeeze-n-excitation
[avgpool]
squeeze ratio r=16 (recommended r=16)
[convolutional] filters=24 size=1 stride=1 activation=swish
excitation
[convolutional] filters=384 size=1 stride=1 activation=logistic
multiply channels
[scale_channels] from=-4
conv_4_7_linear
[convolutional] filters=112 size=1 stride=1 pad=0 batch_normalize=1 activation=linear
CONV6 - MBConv6 - 2 (3)
conv_5_1_expand
[convolutional] filters=576 size=1 stride=1 pad=0 batch_normalize=1 activation=swish
conv_5_1_dwise
[convolutional] groups=576 filters=576 size=5 stride=1 pad=1 batch_normalize=1 activation=swish
squeeze-n-excitation
[avgpool]
squeeze ratio r=16 (recommended r=16)
[convolutional] filters=32 size=1 stride=1 activation=swish
excitation
[convolutional] filters=576 size=1 stride=1 activation=logistic
multiply channels
[scale_channels] from=-4
conv_5_1_linear
[convolutional] filters=112 size=1 stride=1 pad=0 batch_normalize=1 activation=linear
CONV6 - MBConv6 - 3 (3)
dropout only before residual connection
[dropout] probability=.2
block_5_1
[shortcut] from=-9 activation=linear
conv_5_2_expand
[convolutional] filters=576 size=1 stride=1 pad=0 batch_normalize=1 activation=swish
conv_5_2_dwise
[convolutional] groups=576 filters=576 size=5 stride=1 pad=1 batch_normalize=1 activation=swish
squeeze-n-excitation
[avgpool]
squeeze ratio r=16 (recommended r=16)
[convolutional] filters=32 size=1 stride=1 activation=swish
excitation
[convolutional] filters=576 size=1 stride=1 activation=logistic
multiply channels
[scale_channels] from=-4
conv_5_2_linear
[convolutional] filters=112 size=1 stride=1 pad=0 batch_normalize=1 activation=linear
CONV7 - MBConv6 - 1 (4)
dropout only before residual connection
[dropout] probability=.2
block_5_2
[shortcut] from=-9 activation=linear
conv_5_3_expand
[convolutional] filters=576 size=1 stride=1 pad=0 batch_normalize=1 activation=swish
conv_5_3_dwise
[convolutional] groups=576 filters=576 size=5 pad=1 stride=2 batch_normalize=1 activation=swish
squeeze-n-excitation
[avgpool]
squeeze ratio r=16 (recommended r=16)
[convolutional] filters=32 size=1 stride=1 activation=swish
excitation
[convolutional] filters=576 size=1 stride=1 activation=logistic
multiply channels
[scale_channels] from=-4
conv_5_3_linear
[convolutional] filters=192 size=1 stride=1 pad=0 batch_normalize=1 activation=linear
CONV7 - MBConv6 - 2 (4)
conv_6_1_expand
[convolutional] filters=960 size=1 stride=1 pad=0 batch_normalize=1 activation=swish
conv_6_1_dwise
[convolutional] groups=960 filters=960 size=5 stride=1 pad=1 batch_normalize=1 activation=swish
squeeze-n-excitation
[avgpool]
squeeze ratio r=16 (recommended r=16)
[convolutional] filters=64 size=1 stride=1 activation=swish
excitation
[convolutional] filters=960 size=1 stride=1 activation=logistic
multiply channels
[scale_channels] from=-4
conv_6_1_linear
[convolutional] filters=192 size=1 stride=1 pad=0 batch_normalize=1 activation=linear
CONV7 - MBConv6 - 3 (4)
dropout only before residual connection
[dropout] probability=.2
block_6_1
[shortcut] from=-9 activation=linear
conv_6_2_expand
[convolutional] filters=960 size=1 stride=1 pad=0 batch_normalize=1 activation=swish
conv_6_2_dwise
[convolutional] groups=960 filters=960 size=5 stride=1 pad=1 batch_normalize=1 activation=swish
squeeze-n-excitation
[avgpool]
squeeze ratio r=16 (recommended r=16)
[convolutional] filters=64 size=1 stride=1 activation=swish
excitation
[convolutional] filters=960 size=1 stride=1 activation=logistic
multiply channels
[scale_channels] from=-4
conv_6_2_linear
[convolutional] filters=192 size=1 stride=1 pad=0 batch_normalize=1 activation=linear
CONV7 - MBConv6 - 4 (4)
dropout only before residual connection
[dropout] probability=.2
block_6_1
[shortcut] from=-9 activation=linear
conv_6_2_expand
[convolutional] filters=960 size=1 stride=1 pad=0 batch_normalize=1 activation=swish
conv_6_2_dwise
[convolutional] groups=960 filters=960 size=5 stride=1 pad=1 batch_normalize=1 activation=swish
squeeze-n-excitation
[avgpool]
squeeze ratio r=16 (recommended r=16)
[convolutional] filters=64 size=1 stride=1 activation=swish
excitation
[convolutional] filters=960 size=1 stride=1 activation=logistic
multiply channels
[scale_channels] from=-4
conv_6_2_linear
[convolutional] filters=192 size=1 stride=1 pad=0 batch_normalize=1 activation=linear
CONV8 - MBConv6 - 1 (1)
dropout only before residual connection
[dropout] probability=.2
block_6_2
[shortcut] from=-9 activation=linear
conv_6_3_expand
[convolutional] filters=960 size=1 stride=1 pad=0 batch_normalize=1 activation=swish
conv_6_3_dwise
[convolutional] groups=960 filters=960 size=3 stride=1 pad=1 batch_normalize=1 activation=swish
squeeze-n-excitation
[avgpool]
squeeze ratio r=16 (recommended r=16)
[convolutional] filters=64 size=1 stride=1 activation=swish
excitation
[convolutional] filters=960 size=1 stride=1 activation=logistic
multiply channels
[scale_channels] from=-4
conv_6_3_linear
[convolutional] filters=320 size=1 stride=1 pad=0 batch_normalize=1 activation=linear
CONV9 - Conv2d 1x1
conv_6_4
[convolutional] filters=1280 size=1 stride=1 pad=0 batch_normalize=1 activation=swish
[avgpool]
[dropout] probability=.2
[convolutional] filters=2 size=1 stride=1 pad=0 activation=linear
[softmax] groups=1
[cost]
type=sse
5、Train command: ....\darknet.exe classifier train imagenet.data efficientnet_b0.cfg -topk
6、Result: