Open mozpp opened 5 years ago
@heavengate @wanghaoshuang
我的做法(目前有bug) 修改train.py
# -*- coding: utf-8 -*-
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
def set_paddle_flags(flags):
for key, value in flags.items():
if os.environ.get(key, None) is None:
os.environ[key] = str(value)
set_paddle_flags({
'FLAGS_eager_delete_tensor_gb': 0, # enable gc
'FLAGS_memory_fraction_of_eager_deletion': 1,
'FLAGS_fraction_of_gpu_memory_to_use': 0.8
})
import sys
import numpy as np
import random
import time
import shutil
from utility import parse_args, print_arguments, SmoothedValue
import paddle
import paddle.fluid as fluid
import reader
from models.yolov3 import YOLOv3
from learning_rate import exponential_with_warmup_decay
from config import cfg
from paddle.fluid.contrib.slim import Compressor
def build_program(main_prog, startup_prog, is_train):
with fluid.program_guard(main_prog, startup_prog):
if is_train:
with fluid.unique_name.guard("train"):
model = YOLOv3()
model.build_model()
py_reader = model.py_reader
loss = model.loss()
loss.persistable = True
learning_rate = cfg.learning_rate
boundaries = cfg.lr_steps
gamma = cfg.lr_gamma
step_num = len(cfg.lr_steps)
values = [learning_rate * (gamma ** i) for i in range(step_num + 1)]
optimizer = fluid.optimizer.Momentum(
learning_rate=exponential_with_warmup_decay(
learning_rate=learning_rate,
boundaries=boundaries,
values=values,
warmup_iter=cfg.warm_up_iter,
warmup_factor=cfg.warm_up_factor),
regularization=fluid.regularizer.L2Decay(cfg.weight_decay),
momentum=cfg.momentum)
optimizer.minimize(loss)
outs=[py_reader, loss]
else:
with fluid.unique_name.guard("inference"):
model = YOLOv3(is_train=False,is_val=True)
model.build_model()
py_reader = model.py_reader
map = model.get_map()
outputs=model.get_pred()
outs=[py_reader,map,outputs]
return outs
def train():
if cfg.debug:
fluid.default_startup_program().random_seed = 1000
fluid.default_main_program().random_seed = 1000
random.seed(0)
np.random.seed(0)
if not os.path.exists(cfg.model_save_dir):
os.makedirs(cfg.model_save_dir)
input_size = cfg.input_size
devices = os.getenv("CUDA_VISIBLE_DEVICES") or ""
devices_num = len(devices.split(","))
print("Found {} CUDA devices.".format(devices_num))
startup_prog = fluid.Program()
train_prog = fluid.Program()
test_prog = fluid.Program()
train_py_reader, loss = build_program(
main_prog=train_prog,
startup_prog=startup_prog,
is_train=True)
test_py_reader, map_var, outputs = build_program(
main_prog=test_prog,
startup_prog=startup_prog,
is_train=False)
test_prog = test_prog.clone(for_test=True)
for param in train_prog.global_block().all_parameters():
if 'conv' in param.name:
print (param.name, param.shape)
place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(startup_prog)
if cfg.pretrain:
if not os.path.exists(cfg.pretrain):
print("Pretrain weights not found: {}".format(cfg.pretrain))
def if_exist(var):
# print(var.name,os.path.exists(os.path.join(cfg.pretrain, var.name)) )
if cfg.class_num != 80:
return os.path.exists(os.path.join(cfg.pretrain, var.name)) \
and var.name.find('yolo_output') < 0 # class—num不同,则不加载output层权重
else:
return os.path.exists(os.path.join(cfg.pretrain, var.name))
fluid.io.load_vars(exe, cfg.pretrain, main_program=train_prog, predicate=if_exist)
test_reader = reader.test(input_size, batch_size=cfg.batch_size)
test_py_reader.decorate_paddle_reader(test_reader)
random_sizes = [cfg.input_size]
if cfg.random_shape:
random_sizes = [32 * i for i in range(10, 20)]
total_iter = cfg.max_iter - cfg.start_iter
mixup_iter = total_iter - cfg.no_mixup_iter
train_reader = reader.train(input_size,
batch_size=cfg.batch_size,
shuffle=True,
total_iter=total_iter * devices_num,
mixup_iter=mixup_iter * devices_num,
random_sizes=random_sizes,
use_multiprocessing=cfg.use_multiprocess)
train_py_reader.decorate_paddle_reader(train_reader)
train_fetch_list = [("loss", loss.name)]
val_fetch_list = [("map", map_var.name)]
compressor = Compressor(
place,
fluid.global_scope(),
train_prog,
train_reader=train_py_reader,
train_feed_list=None,
train_fetch_list=train_fetch_list,
eval_program=test_prog,
eval_reader=test_py_reader,
eval_feed_list=None,
eval_fetch_list=val_fetch_list,
train_optimizer=None)
compressor.config('./compress/compress.yaml')
compressor.run()
pruned_prog = compressor.eval_graph.program
# fluid.io.save_inference_model("./compress/pruned_model/", [image.name, label.name], [acc_top1], exe,
# main_program=pruned_prog)
fluid.io.save_inference_model("./compress/pruned_model/",
feeded_var_names=['image', 'im_shape'],
target_vars=[outputs],
executor=exe, main_program=pruned_prog)
# check the shape of parameters
for param in pruned_prog.global_block().all_parameters():
print("name: {}; shape: {}".format(param.name, param.shape))
if __name__ == '__main__':
args = parse_args()
print_arguments(args)
train()
修改yolov3.py
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
from __future__ import division
from __future__ import print_function
import paddle.fluid as fluid
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.initializer import Constant
from paddle.fluid.initializer import Normal
from paddle.fluid.regularizer import L2Decay
from config import cfg
from .darknet_ours import add_DarkNet53_conv_body
from .darknet_ours import conv_bn_layer
# from .darknet import add_DarkNet53_conv_body
# from .darknet import conv_bn_layer
def yolo_detection_block(input, channel, is_test=True, name=None):
assert channel % 2 == 0, \
"channel {} cannot be divided by 2".format(channel)
conv = input
for j in range(2):
conv = conv_bn_layer(conv, channel, filter_size=1,
stride=1, padding=0, is_test=is_test,
name='{}.{}.0'.format(name, j))
conv = conv_bn_layer(conv, channel*2, filter_size=3,
stride=1, padding=1, is_test=is_test,
name='{}.{}.1'.format(name, j))
route = conv_bn_layer(conv, channel, filter_size=1, stride=1,
padding=0, is_test=is_test,
name='{}.2'.format(name))
tip = conv_bn_layer(route,channel*2, filter_size=3, stride=1,
padding=1, is_test=is_test,
name='{}.tip'.format(name))
return route, tip
def upsample(input, scale=2,name=None):
# get dynamic upsample output shape
shape_nchw = fluid.layers.shape(input)
shape_hw = fluid.layers.slice(shape_nchw, axes=[0], starts=[2], ends=[4])
shape_hw.stop_gradient = True
in_shape = fluid.layers.cast(shape_hw, dtype='int32')
out_shape = in_shape * scale
out_shape.stop_gradient = True
# reisze by actual_shape
out = fluid.layers.resize_nearest(
input=input,
scale=scale,
actual_shape=out_shape,
name=name)
return out
class YOLOv3(object):
def __init__(self,
is_train=True,
use_random=True,
is_val=False):
self.is_train = is_train
self.use_random = use_random
self.is_val=is_val#add val
self.outputs = []
self.losses = []
self.downsample = 32
def build_input(self):
self.image_shape = [3, cfg.input_size, cfg.input_size]
if self.is_train:
self.py_reader = fluid.layers.py_reader(
capacity=64,
shapes = [[-1] + self.image_shape,
[-1, cfg.max_box_num, 4],
[-1, cfg.max_box_num],
[-1, cfg.max_box_num]],
lod_levels=[0, 0, 0, 0],
dtypes=['float32'] * 2 + ['int32'] + ['float32'],
use_double_buffer=True)
self.image, self.gtbox, self.gtlabel, self.gtscore = \
fluid.layers.read_file(self.py_reader)
elif self.is_val:#add val
self.py_reader = fluid.layers.py_reader(
capacity=64,
shapes=[[-1] + self.image_shape,
[-1, cfg.max_box_num, 4],
[-1, cfg.max_box_num],
[-1, cfg.max_box_num]],
lod_levels=[0, 0, 0, 0],
dtypes=['float32'] * 2 + ['int32'] + ['float32'],
use_double_buffer=True)
self.image, self.gtbox, self.gtlabel, self.gtscore = \
fluid.layers.read_file(self.py_reader)
self.im_shape = fluid.layers.data(
name="im_shape", shape=[2], dtype='int32')
self.im_id = fluid.layers.data(
name="im_id", shape=[1], dtype='int32')
else:
self.image = fluid.layers.data(
name='image', shape=self.image_shape, dtype='float32'
)
self.im_shape = fluid.layers.data(
name="im_shape", shape=[2], dtype='int32')
self.im_id = fluid.layers.data(
name="im_id", shape=[1], dtype='int32')
def feeds(self):
if not self.is_train:
return [self.image, self.im_id, self.im_shape]
# return [self.image, self.im_shape]
return [self.image, self.gtbox, self.gtlabel, self.gtscore]
def feeds_0id(self):
if not self.is_train:
# return [self.image, self.im_id, self.im_shape]
return [self.image, self.im_shape]
return [self.image, self.gtbox, self.gtlabel, self.gtscore]
def build_model(self):
self.build_input()
self.outputs = []
self.boxes = []
self.scores = []
blocks = add_DarkNet53_conv_body(self.image, not self.is_train)
for i, block in enumerate(blocks):
if i > 0:
block = fluid.layers.concat(
input=[route, block],
axis=1)
route, tip = yolo_detection_block(block, channel=512//(2**i),
is_test=(not self.is_train),
name="yolo_block.{}".format(i))
# out channel number = mask_num * (5 + class_num)
num_filters = len(cfg.anchor_masks[i]) * (cfg.class_num + 5)
block_out = fluid.layers.conv2d(
input=tip,
num_filters=num_filters,
filter_size=1,
stride=1,
padding=0,
act=None,
param_attr=ParamAttr(initializer=fluid.initializer.Normal(0., 0.02),
name="yolo_output.{}.conv.weights".format(i)),
bias_attr=ParamAttr(initializer=fluid.initializer.Constant(0.0),
regularizer=L2Decay(0.),
name="yolo_output.{}.conv.bias".format(i)))
self.outputs.append(block_out)
if i < len(blocks) - 1:
route = conv_bn_layer(
input=route,
ch_out=256//(2**i),
filter_size=1,
stride=1,
padding=0,
is_test=(not self.is_train),
name="yolo_transition.{}".format(i))
# upsample
route = upsample(route)
for i, out in enumerate(self.outputs):
anchor_mask = cfg.anchor_masks[i]
if self.is_train:
loss = fluid.layers.yolov3_loss(
x=out,
gt_box=self.gtbox,
gt_label=self.gtlabel,
gt_score=self.gtscore,
anchors=cfg.anchors,
anchor_mask=anchor_mask,
class_num=cfg.class_num,
ignore_thresh=cfg.ignore_thresh,
downsample_ratio=self.downsample,
use_label_smooth=cfg.label_smooth,
name="yolo_loss"+str(i))
self.losses.append(fluid.layers.reduce_mean(loss))
elif self.is_val:#add val
mask_anchors = []
for m in anchor_mask:
mask_anchors.append(cfg.anchors[2 * m])
mask_anchors.append(cfg.anchors[2 * m + 1])
boxes, scores = fluid.layers.yolo_box(
x=out,
img_size=self.im_shape,
anchors=mask_anchors,
class_num=cfg.class_num,
conf_thresh=cfg.valid_thresh,
downsample_ratio=self.downsample,
name="yolo_box" + str(i))
self.boxes.append(boxes)
self.scores.append(fluid.layers.transpose(scores, perm=[0, 2, 1]))
else:
mask_anchors=[]
for m in anchor_mask:
mask_anchors.append(cfg.anchors[2 * m])
mask_anchors.append(cfg.anchors[2 * m + 1])
boxes, scores = fluid.layers.yolo_box(
x=out,
img_size=self.im_shape,
anchors=mask_anchors,
class_num=cfg.class_num,
conf_thresh=cfg.valid_thresh,
downsample_ratio=self.downsample,
name="yolo_box"+str(i))
self.boxes.append(boxes)
self.scores.append(fluid.layers.transpose(scores, perm=[0, 2, 1]))
self.downsample //= 2
def loss(self):
return sum(self.losses)
def get_pred(self):
yolo_boxes = fluid.layers.concat(self.boxes, axis=1)
yolo_scores = fluid.layers.concat(self.scores, axis=2)
return fluid.layers.multiclass_nms(
bboxes=yolo_boxes,
scores=yolo_scores,
score_threshold=cfg.valid_thresh,
nms_top_k=cfg.nms_topk,
keep_top_k=cfg.nms_posk,
nms_threshold=cfg.nms_thresh,
background_label=-1,
name="multiclass_nms")
def get_map(self):
yolo_boxes = fluid.layers.concat(self.boxes, axis=1)
yolo_scores = fluid.layers.concat(self.scores, axis=2)
nmsed_out = fluid.layers.multiclass_nms(
bboxes=yolo_boxes,
scores=yolo_scores,
score_threshold=cfg.valid_thresh,
nms_top_k=cfg.nms_topk,
keep_top_k=cfg.nms_posk,
nms_threshold=cfg.nms_thresh,
background_label=-1,
name="multiclass_nms")
# print(" self.gtlabel:{}".format(self.gtlabel))
# print(" self.gtbox:{}".format(self.gtbox))
# print(nmsed_out)
# map_eval = fluid.metrics.DetectionMAP(
# nmsed_out,
# self.gtlabel,
# self.gtbox,
# gt_difficult=None,
# class_num=cfg.class_num,
# overlap_threshold=0.5,
# evaluate_difficult=False,
# ap_version='integral')
self.gtlabel = fluid.layers.cast(x=self.gtlabel, dtype=self.gtbox.dtype)
difficult=self.gtscore
if 0:
difficult = fluid.layers.cast(x=difficult, dtype=self.gtbox.dtype)
self.gtlabel = fluid.layers.reshape(self.gtlabel, [-1, 1])
difficult = fluid.layers.reshape(difficult, [-1, 1])
label = fluid.layers.concat([self.gtlabel, difficult, self.gtbox], axis=1)
else:
self.gtlabel = fluid.layers.reshape(self.gtlabel, [-1, 1])
self.gtbox = fluid.layers.reshape(self.gtbox, [-1, 4])
label = fluid.layers.concat([self.gtlabel, self.gtbox], axis=1)
# print(nmsed_out)
# label = fluid.layers.data(
# name='label',
# shape=[10, 5],
# append_batch_size=False,
# dtype='float32')
# print(label)
return fluid.layers.detection.detection_map(
nmsed_out,
label,
background_label=0,
class_num=cfg.class_num,
overlap_threshold=0.5,
evaluate_difficult=False,
ap_version='integral')
10月中旬paddle1.6版本会正式发布yolov3在剪裁、蒸馏和量化策略上的示例。
我会将近期的工作同步到该issue, 多谢关注与支持。
10月中旬paddle1.6版本会正式发布yolov3在剪裁、蒸馏和量化策略上的示例。
我会将近期的工作同步到该issue, 多谢关注与支持。
现在1.6发布了,我找不到关于yolov3的剪裁、量化的实例,已经可以使用了吗
https://github.com/PaddlePaddle/models/issues/2350 当前根据官方的slim-ssd修改yolov3代码,遇到很多问题。“reader要改,模型结构要改,配置文件要改,保存结果也要改,需要修改不少源码。” 而且paddle的ssd和yolo的代码风格很不一样,改起来遇到很多问题。