PaddlePaddle / models

Officially maintained, supported by PaddlePaddle, including CV, NLP, Speech, Rec, TS, big models and so on.
Apache License 2.0
6.91k stars 2.91k forks source link

yolov3能否增加slim的相应代码 #3396

Open mozpp opened 5 years ago

mozpp commented 5 years ago

https://github.com/PaddlePaddle/models/issues/2350 当前根据官方的slim-ssd修改yolov3代码,遇到很多问题。“reader要改,模型结构要改,配置文件要改,保存结果也要改,需要修改不少源码。” 而且paddle的ssd和yolo的代码风格很不一样,改起来遇到很多问题。

mozpp commented 5 years ago

@heavengate @wanghaoshuang

mozpp commented 5 years ago

我的做法(目前有bug) 修改train.py

# -*- coding: utf-8 -*-

#  Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os

def set_paddle_flags(flags):
    for key, value in flags.items():
        if os.environ.get(key, None) is None:
            os.environ[key] = str(value)

set_paddle_flags({
    'FLAGS_eager_delete_tensor_gb': 0,  # enable gc
    'FLAGS_memory_fraction_of_eager_deletion': 1,
    'FLAGS_fraction_of_gpu_memory_to_use': 0.8
})
import sys
import numpy as np
import random
import time
import shutil
from utility import parse_args, print_arguments, SmoothedValue

import paddle
import paddle.fluid as fluid
import reader
from models.yolov3 import YOLOv3
from learning_rate import exponential_with_warmup_decay
from config import cfg
from paddle.fluid.contrib.slim import Compressor

def build_program(main_prog, startup_prog, is_train):
    with fluid.program_guard(main_prog, startup_prog):
        if is_train:
            with fluid.unique_name.guard("train"):
                model = YOLOv3()
                model.build_model()
                py_reader = model.py_reader
                loss = model.loss()
                loss.persistable = True

                learning_rate = cfg.learning_rate
                boundaries = cfg.lr_steps
                gamma = cfg.lr_gamma
                step_num = len(cfg.lr_steps)
                values = [learning_rate * (gamma ** i) for i in range(step_num + 1)]
                optimizer = fluid.optimizer.Momentum(
                    learning_rate=exponential_with_warmup_decay(
                        learning_rate=learning_rate,
                        boundaries=boundaries,
                        values=values,
                        warmup_iter=cfg.warm_up_iter,
                        warmup_factor=cfg.warm_up_factor),
                    regularization=fluid.regularizer.L2Decay(cfg.weight_decay),
                    momentum=cfg.momentum)
                optimizer.minimize(loss)
                outs=[py_reader, loss]
        else:
            with fluid.unique_name.guard("inference"):
                model = YOLOv3(is_train=False,is_val=True)
                model.build_model()
                py_reader = model.py_reader
                map = model.get_map()
                outputs=model.get_pred()
                outs=[py_reader,map,outputs]
        return outs

def train():
    if cfg.debug:
        fluid.default_startup_program().random_seed = 1000
        fluid.default_main_program().random_seed = 1000
        random.seed(0)
        np.random.seed(0)

    if not os.path.exists(cfg.model_save_dir):
        os.makedirs(cfg.model_save_dir)

    input_size = cfg.input_size

    devices = os.getenv("CUDA_VISIBLE_DEVICES") or ""
    devices_num = len(devices.split(","))
    print("Found {} CUDA devices.".format(devices_num))

    startup_prog = fluid.Program()
    train_prog = fluid.Program()
    test_prog = fluid.Program()

    train_py_reader, loss = build_program(
        main_prog=train_prog,
        startup_prog=startup_prog,
        is_train=True)
    test_py_reader, map_var, outputs = build_program(
        main_prog=test_prog,
        startup_prog=startup_prog,
        is_train=False)

    test_prog = test_prog.clone(for_test=True)

    for param in train_prog.global_block().all_parameters():
        if 'conv' in param.name:
            print (param.name, param.shape)

    place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(startup_prog)

    if cfg.pretrain:
        if not os.path.exists(cfg.pretrain):
            print("Pretrain weights not found: {}".format(cfg.pretrain))
        def if_exist(var):
            # print(var.name,os.path.exists(os.path.join(cfg.pretrain, var.name)) )
            if cfg.class_num != 80:
                return os.path.exists(os.path.join(cfg.pretrain, var.name)) \
                       and var.name.find('yolo_output') < 0  # class—num不同,则不加载output层权重
            else:
                return os.path.exists(os.path.join(cfg.pretrain, var.name))
        fluid.io.load_vars(exe, cfg.pretrain, main_program=train_prog, predicate=if_exist)

    test_reader = reader.test(input_size, batch_size=cfg.batch_size)
    test_py_reader.decorate_paddle_reader(test_reader)
    random_sizes = [cfg.input_size]
    if cfg.random_shape:
        random_sizes = [32 * i for i in range(10, 20)]
    total_iter = cfg.max_iter - cfg.start_iter
    mixup_iter = total_iter - cfg.no_mixup_iter
    train_reader = reader.train(input_size,
                                batch_size=cfg.batch_size,
                                shuffle=True,
                                total_iter=total_iter * devices_num,
                                mixup_iter=mixup_iter * devices_num,
                                random_sizes=random_sizes,
                                use_multiprocessing=cfg.use_multiprocess)
    train_py_reader.decorate_paddle_reader(train_reader)

    train_fetch_list = [("loss", loss.name)]
    val_fetch_list = [("map", map_var.name)]
    compressor = Compressor(
        place,
        fluid.global_scope(),
        train_prog,
        train_reader=train_py_reader,
        train_feed_list=None,
        train_fetch_list=train_fetch_list,
        eval_program=test_prog,
        eval_reader=test_py_reader,
        eval_feed_list=None,
        eval_fetch_list=val_fetch_list,
        train_optimizer=None)
    compressor.config('./compress/compress.yaml')
    compressor.run()

    pruned_prog = compressor.eval_graph.program

    # fluid.io.save_inference_model("./compress/pruned_model/", [image.name, label.name], [acc_top1], exe,
    #                               main_program=pruned_prog)
    fluid.io.save_inference_model("./compress/pruned_model/",
                                  feeded_var_names=['image', 'im_shape'],
                                  target_vars=[outputs],
                                  executor=exe, main_program=pruned_prog)
    # check the shape of parameters
    for param in pruned_prog.global_block().all_parameters():
        print("name: {}; shape: {}".format(param.name, param.shape))

if __name__ == '__main__':
    args = parse_args()
    print_arguments(args)
    train()

修改yolov3.py

#  Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.

from __future__ import division
from __future__ import print_function

import paddle.fluid as fluid
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.initializer import Constant
from paddle.fluid.initializer import Normal
from paddle.fluid.regularizer import L2Decay

from config import cfg

from .darknet_ours import add_DarkNet53_conv_body
from .darknet_ours import conv_bn_layer
# from .darknet import add_DarkNet53_conv_body
# from .darknet import conv_bn_layer

def yolo_detection_block(input, channel, is_test=True, name=None):
    assert channel % 2 == 0, \
            "channel {} cannot be divided by 2".format(channel)
    conv = input
    for j in range(2):
        conv = conv_bn_layer(conv, channel, filter_size=1, 
                             stride=1, padding=0, is_test=is_test, 
                             name='{}.{}.0'.format(name, j))
        conv = conv_bn_layer(conv, channel*2, filter_size=3, 
                             stride=1, padding=1, is_test=is_test, 
                             name='{}.{}.1'.format(name, j))
    route = conv_bn_layer(conv, channel, filter_size=1, stride=1, 
                          padding=0, is_test=is_test, 
                          name='{}.2'.format(name))
    tip = conv_bn_layer(route,channel*2, filter_size=3, stride=1, 
                        padding=1, is_test=is_test, 
                        name='{}.tip'.format(name))
    return route, tip

def upsample(input, scale=2,name=None):
    # get dynamic upsample output shape
    shape_nchw = fluid.layers.shape(input)
    shape_hw = fluid.layers.slice(shape_nchw, axes=[0], starts=[2], ends=[4])
    shape_hw.stop_gradient = True
    in_shape = fluid.layers.cast(shape_hw, dtype='int32')
    out_shape = in_shape * scale
    out_shape.stop_gradient = True

    # reisze by actual_shape
    out = fluid.layers.resize_nearest(
        input=input,
        scale=scale,
        actual_shape=out_shape,
        name=name)
    return out

class YOLOv3(object):
    def __init__(self, 
                is_train=True,
                use_random=True,
                 is_val=False):
        self.is_train = is_train
        self.use_random = use_random
        self.is_val=is_val#add val
        self.outputs = []
        self.losses = []
        self.downsample = 32

    def build_input(self):
        self.image_shape = [3, cfg.input_size, cfg.input_size]
        if self.is_train:
            self.py_reader = fluid.layers.py_reader(
                capacity=64,
                shapes = [[-1] + self.image_shape, 
                          [-1, cfg.max_box_num, 4], 
                          [-1, cfg.max_box_num], 
                          [-1, cfg.max_box_num]],
                lod_levels=[0, 0, 0, 0],
                dtypes=['float32'] * 2 + ['int32'] + ['float32'],
                use_double_buffer=True)
            self.image, self.gtbox, self.gtlabel, self.gtscore = \
                    fluid.layers.read_file(self.py_reader)

        elif self.is_val:#add val
            self.py_reader = fluid.layers.py_reader(
                capacity=64,
                shapes=[[-1] + self.image_shape,
                        [-1, cfg.max_box_num, 4],
                        [-1, cfg.max_box_num],
                        [-1, cfg.max_box_num]],
                lod_levels=[0, 0, 0, 0],
                dtypes=['float32'] * 2 + ['int32'] + ['float32'],
                use_double_buffer=True)
            self.image, self.gtbox, self.gtlabel, self.gtscore = \
                fluid.layers.read_file(self.py_reader)
            self.im_shape = fluid.layers.data(
                name="im_shape", shape=[2], dtype='int32')
            self.im_id = fluid.layers.data(
                name="im_id", shape=[1], dtype='int32')
        else:
            self.image = fluid.layers.data(
                    name='image', shape=self.image_shape, dtype='float32'
                    )
            self.im_shape = fluid.layers.data(
                    name="im_shape", shape=[2], dtype='int32')
            self.im_id = fluid.layers.data(
                    name="im_id", shape=[1], dtype='int32')

    def feeds(self):
        if not self.is_train:
            return [self.image, self.im_id, self.im_shape]
            # return [self.image, self.im_shape]
        return [self.image, self.gtbox, self.gtlabel, self.gtscore]

    def feeds_0id(self):
        if not self.is_train:
            # return [self.image, self.im_id, self.im_shape]
            return [self.image, self.im_shape]
        return [self.image, self.gtbox, self.gtlabel, self.gtscore]

    def build_model(self):
        self.build_input()

        self.outputs = []
        self.boxes = []
        self.scores = []

        blocks = add_DarkNet53_conv_body(self.image, not self.is_train)
        for i, block in enumerate(blocks):
            if i > 0:
                block = fluid.layers.concat(
                    input=[route, block],
                    axis=1)
            route, tip = yolo_detection_block(block, channel=512//(2**i), 
                                        is_test=(not self.is_train),
                                        name="yolo_block.{}".format(i))

            # out channel number = mask_num * (5 + class_num)
            num_filters = len(cfg.anchor_masks[i]) * (cfg.class_num + 5)
            block_out = fluid.layers.conv2d(
                input=tip,
                num_filters=num_filters,
                filter_size=1,
                stride=1,
                padding=0,
                act=None,
                param_attr=ParamAttr(initializer=fluid.initializer.Normal(0., 0.02),
                     name="yolo_output.{}.conv.weights".format(i)),
                bias_attr=ParamAttr(initializer=fluid.initializer.Constant(0.0),
                                     regularizer=L2Decay(0.),
                                     name="yolo_output.{}.conv.bias".format(i)))
            self.outputs.append(block_out)

            if i < len(blocks) - 1:
                route = conv_bn_layer(
                    input=route,
                    ch_out=256//(2**i),
                    filter_size=1,
                    stride=1,
                    padding=0,
                    is_test=(not self.is_train),
                    name="yolo_transition.{}".format(i))
                # upsample
                route = upsample(route)

        for i, out in enumerate(self.outputs):
            anchor_mask = cfg.anchor_masks[i]

            if self.is_train:
                loss = fluid.layers.yolov3_loss(
                        x=out,
                        gt_box=self.gtbox,
                        gt_label=self.gtlabel,
                        gt_score=self.gtscore,
                        anchors=cfg.anchors,
                        anchor_mask=anchor_mask,
                        class_num=cfg.class_num,
                        ignore_thresh=cfg.ignore_thresh,
                        downsample_ratio=self.downsample,
                        use_label_smooth=cfg.label_smooth,
                        name="yolo_loss"+str(i))
                self.losses.append(fluid.layers.reduce_mean(loss))
            elif self.is_val:#add val
                mask_anchors = []
                for m in anchor_mask:
                    mask_anchors.append(cfg.anchors[2 * m])
                    mask_anchors.append(cfg.anchors[2 * m + 1])
                boxes, scores = fluid.layers.yolo_box(
                    x=out,
                    img_size=self.im_shape,
                    anchors=mask_anchors,
                    class_num=cfg.class_num,
                    conf_thresh=cfg.valid_thresh,
                    downsample_ratio=self.downsample,
                    name="yolo_box" + str(i))
                self.boxes.append(boxes)
                self.scores.append(fluid.layers.transpose(scores, perm=[0, 2, 1]))
            else:
                mask_anchors=[]
                for m in anchor_mask:
                    mask_anchors.append(cfg.anchors[2 * m])
                    mask_anchors.append(cfg.anchors[2 * m + 1])
                boxes, scores = fluid.layers.yolo_box(
                        x=out,
                        img_size=self.im_shape,
                        anchors=mask_anchors,
                        class_num=cfg.class_num,
                        conf_thresh=cfg.valid_thresh,
                        downsample_ratio=self.downsample,
                        name="yolo_box"+str(i))
                self.boxes.append(boxes)
                self.scores.append(fluid.layers.transpose(scores, perm=[0, 2, 1]))

            self.downsample //= 2

    def loss(self):
        return sum(self.losses)

    def get_pred(self):
        yolo_boxes = fluid.layers.concat(self.boxes, axis=1)
        yolo_scores = fluid.layers.concat(self.scores, axis=2)
        return fluid.layers.multiclass_nms(
                bboxes=yolo_boxes,
                scores=yolo_scores,
                score_threshold=cfg.valid_thresh,
                nms_top_k=cfg.nms_topk,
                keep_top_k=cfg.nms_posk,
                nms_threshold=cfg.nms_thresh,
                background_label=-1,
                name="multiclass_nms")

    def get_map(self):
        yolo_boxes = fluid.layers.concat(self.boxes, axis=1)
        yolo_scores = fluid.layers.concat(self.scores, axis=2)
        nmsed_out = fluid.layers.multiclass_nms(
            bboxes=yolo_boxes,
            scores=yolo_scores,
            score_threshold=cfg.valid_thresh,
            nms_top_k=cfg.nms_topk,
            keep_top_k=cfg.nms_posk,
            nms_threshold=cfg.nms_thresh,
            background_label=-1,
            name="multiclass_nms")
        # print(" self.gtlabel:{}".format(self.gtlabel))
        # print(" self.gtbox:{}".format(self.gtbox))
        # print(nmsed_out)
        # map_eval = fluid.metrics.DetectionMAP(
        #     nmsed_out,
        #     self.gtlabel,
        #     self.gtbox,
        #     gt_difficult=None,
        #     class_num=cfg.class_num,
        #     overlap_threshold=0.5,
        #     evaluate_difficult=False,
        #     ap_version='integral')

        self.gtlabel = fluid.layers.cast(x=self.gtlabel, dtype=self.gtbox.dtype)
        difficult=self.gtscore
        if 0:
            difficult = fluid.layers.cast(x=difficult, dtype=self.gtbox.dtype)
            self.gtlabel = fluid.layers.reshape(self.gtlabel, [-1, 1])
            difficult = fluid.layers.reshape(difficult, [-1, 1])
            label = fluid.layers.concat([self.gtlabel, difficult, self.gtbox], axis=1)
        else:
            self.gtlabel = fluid.layers.reshape(self.gtlabel, [-1, 1])
            self.gtbox = fluid.layers.reshape(self.gtbox, [-1, 4])
            label = fluid.layers.concat([self.gtlabel, self.gtbox], axis=1)
        # print(nmsed_out)

        # label = fluid.layers.data(
        #     name='label',
        #     shape=[10, 5],
        #     append_batch_size=False,
        #     dtype='float32')
        # print(label)
        return fluid.layers.detection.detection_map(
            nmsed_out,
            label,
            background_label=0,
            class_num=cfg.class_num,
            overlap_threshold=0.5,
            evaluate_difficult=False,
            ap_version='integral')
mozpp commented 5 years ago

Screenshot from 2019-09-24 15-13-24

wanghaoshuang commented 5 years ago

10月中旬paddle1.6版本会正式发布yolov3在剪裁、蒸馏和量化策略上的示例。

我会将近期的工作同步到该issue, 多谢关注与支持。

Ezra-Yu commented 5 years ago

10月中旬paddle1.6版本会正式发布yolov3在剪裁、蒸馏和量化策略上的示例。

我会将近期的工作同步到该issue, 多谢关注与支持。

现在1.6发布了,我找不到关于yolov3的剪裁、量化的实例,已经可以使用了吗