hunglc007 / tensorflow-yolov4-tflite

YOLOv4, YOLOv4-tiny, YOLOv3, YOLOv3-tiny Implemented in Tensorflow 2.0, Android. Convert YOLO v4 .weights tensorflow, tensorrt and tflite

https://github.com/hunglc007/tensorflow-yolov4-tflite

MIT License

2.23k stars 1.24k forks source link

Error when transfer training with yolov4-tiny.weights #238

Open leekyungchoon opened 3 years ago

leekyungchoon commented 3 years ago

I tried transfer training based on yolov4-tiny.weights as below command.

python train.py --weights ./data/yolov4-tiny.weights --tiny

But, I am facing errors below.

Restoring weights from: ./data/yolov4-tiny.weights ... Traceback (most recent call last): File "train.py", line 161, in app.run(main) File "/home/kclee/.conda/envs/kclee01/lib/python3.7/site-packages/absl/app.py", line 300, in run _run_main(main, args) File "/home/kclee/.conda/envs/kclee01/lib/python3.7/site-packages/absl/app.py", line 251, in _run_main sys.exit(main(argv)) File "train.py", line 153, in main for image_data, target in trainset: File "/home/kclee/tensorflow-yolov4-tflite/core/dataset.py", line 122, in next self.train_output_sizes[2], IndexError: index 2 is out of bounds for axis 0 with size 2

Anyone help me to solve this error?

yp19940913 commented 3 years ago

When I built the yolov4 tiny model, I also encountered the same problem. Have you solved the problem now? How does it work? thank you！！！！！！

leekyungchoon commented 3 years ago

When I built the yolov4 tiny model, I also encountered the same problem. Have you solved the problem now? How does it work? thank you！！！！！！

No, I have not yet.

leekyungchoon commented 3 years ago

When I built the yolov4 tiny model, I also encountered the same problem. Have you solved the problem now? How does it work? thank you！！！！！！

If you have not solved this error, I am sharing my code to support yolov4-tiny model. I have updated dataset.py file as below. Hope it helps you.

! /usr/bin/env python

coding=utf-8

import os import cv2 import random import numpy as np import tensorflow as tf import core.utils as utils from core.config import cfg

class Dataset(object): """implement Dataset here"""

def __init__(self, FLAGS, is_training: bool, dataset_type: str = "converted_coco"):
    self.tiny = FLAGS.tiny
    self.strides, self.anchors, NUM_CLASS, XYSCALE = utils.load_config(FLAGS)
    self.dataset_type = dataset_type

    self.annot_path = (
        cfg.TRAIN.ANNOT_PATH if is_training else cfg.TEST.ANNOT_PATH
    )
    self.input_sizes = (
        cfg.TRAIN.INPUT_SIZE if is_training else cfg.TEST.INPUT_SIZE
    )
    self.batch_size = (
        cfg.TRAIN.BATCH_SIZE if is_training else cfg.TEST.BATCH_SIZE
    )
    self.data_aug = cfg.TRAIN.DATA_AUG if is_training else cfg.TEST.DATA_AUG

    self.train_input_sizes = cfg.TRAIN.INPUT_SIZE
    self.classes = utils.read_class_names(cfg.YOLO.CLASSES)
    self.num_classes = len(self.classes)
    self.anchor_per_scale = cfg.YOLO.ANCHOR_PER_SCALE
    self.max_bbox_per_scale = 150

    self.annotations = self.load_annotations()
    self.num_samples = len(self.annotations)
    self.num_batchs = int(np.ceil(self.num_samples / self.batch_size))
    self.batch_count = 0

def load_annotations(self):
    with open(self.annot_path, "r") as f:
        txt = f.readlines()
        if self.dataset_type == "converted_coco":
            annotations = [
                line.strip()
                for line in txt
                if len(line.strip().split()[1:]) != 0
            ]
        elif self.dataset_type == "yolo":
            annotations = []
            for line in txt:
                image_path = line.strip()
                root, _ = os.path.splitext(image_path)
                with open(root + ".txt") as fd:
                    boxes = fd.readlines()
                    string = ""
                    for box in boxes:
                        box = box.strip()
                        box = box.split()
                        class_num = int(box[0])
                        center_x = float(box[1])
                        center_y = float(box[2])
                        half_width = float(box[3]) / 2
                        half_height = float(box[4]) / 2
                        string += " {},{},{},{},{}".format(
                            center_x - half_width,
                            center_y - half_height,
                            center_x + half_width,
                            center_y + half_height,
                            class_num,
                        )
                    annotations.append(image_path + string)

    np.random.shuffle(annotations)
    return annotations

def __iter__(self):
    return self

def __next__(self):
    with tf.device("/cpu:0"):
        # self.train_input_size = random.choice(self.train_input_sizes)
        self.train_input_size = cfg.TRAIN.INPUT_SIZE
        self.train_output_sizes = self.train_input_size // self.strides

        batch_image = np.zeros(
            (
                self.batch_size,
                self.train_input_size,
                self.train_input_size,
                3,
            ),
            dtype=np.float32,
        )

        batch_label_bboxes = []
        batch_bboxes = []
        for size in self.train_output_sizes:
            label_bbox = np.zeros((self.batch_size, size, size, self.anchor_per_scale, 5 + self.num_classes), dtype=np.float32)
            batch_label_bboxes.append(label_bbox)
            batch_bbox = np.zeros((self.batch_size, self.max_bbox_per_scale, 4), dtype=np.float32)
            batch_bboxes.append(batch_bbox)

        num = 0
        if self.batch_count < self.num_batchs:
            while num < self.batch_size:
                index = self.batch_count * self.batch_size + num
                if index >= self.num_samples:
                    index -= self.num_samples
                annotation = self.annotations[index]
                image, bboxes = self.parse_annotation(annotation)
                (
                    label_bboxes,
                    bboxes,
                )  = self.preprocess_true_boxes(bboxes, num_anchors = len(self.train_output_sizes))

                batch_image[num, :, :, :] = image
                for batch_bbox, bbox in zip(batch_bboxes, bboxes):
                    batch_bbox[num,:,:] = bbox
                for batch_label_bbox, label_bbox in zip (batch_label_bboxes, label_bboxes):
                    batch_label_bbox[num, :, :, :] = label_bbox
                num += 1
            self.batch_count += 1
            batch_targets = list(zip(batch_label_bboxes, batch_bboxes))

            return (
                batch_image,
                batch_targets
            )
        else:
            self.batch_count = 0
            np.random.shuffle(self.annotations)
            raise StopIteration

def random_horizontal_flip(self, image, bboxes):
    if random.random() < 0.5:
        _, w, _ = image.shape
        image = image[:, ::-1, :]
        bboxes[:, [0, 2]] = w - bboxes[:, [2, 0]]

    return image, bboxes

def random_crop(self, image, bboxes):
    if random.random() < 0.5:
        h, w, _ = image.shape
        max_bbox = np.concatenate(
            [
                np.min(bboxes[:, 0:2], axis=0),
                np.max(bboxes[:, 2:4], axis=0),
            ],
            axis=-1,
        )

        max_l_trans = max_bbox[0]
        max_u_trans = max_bbox[1]
        max_r_trans = w - max_bbox[2]
        max_d_trans = h - max_bbox[3]

        crop_xmin = max(
            0, int(max_bbox[0] - random.uniform(0, max_l_trans))
        )
        crop_ymin = max(
            0, int(max_bbox[1] - random.uniform(0, max_u_trans))
        )
        crop_xmax = max(
            w, int(max_bbox[2] + random.uniform(0, max_r_trans))
        )
        crop_ymax = max(
            h, int(max_bbox[3] + random.uniform(0, max_d_trans))
        )

        image = image[crop_ymin:crop_ymax, crop_xmin:crop_xmax]

        bboxes[:, [0, 2]] = bboxes[:, [0, 2]] - crop_xmin
        bboxes[:, [1, 3]] = bboxes[:, [1, 3]] - crop_ymin

    return image, bboxes

def random_translate(self, image, bboxes):
    if random.random() < 0.5:
        h, w, _ = image.shape
        max_bbox = np.concatenate(
            [
                np.min(bboxes[:, 0:2], axis=0),
                np.max(bboxes[:, 2:4], axis=0),
            ],
            axis=-1,
        )

        max_l_trans = max_bbox[0]
        max_u_trans = max_bbox[1]
        max_r_trans = w - max_bbox[2]
        max_d_trans = h - max_bbox[3]

        tx = random.uniform(-(max_l_trans - 1), (max_r_trans - 1))
        ty = random.uniform(-(max_u_trans - 1), (max_d_trans - 1))

        M = np.array([[1, 0, tx], [0, 1, ty]])
        image = cv2.warpAffine(image, M, (w, h))

        bboxes[:, [0, 2]] = bboxes[:, [0, 2]] + tx
        bboxes[:, [1, 3]] = bboxes[:, [1, 3]] + ty

    return image, bboxes

def parse_annotation(self, annotation):
    line = annotation.split()
    image_path = line[0]
    if not os.path.exists(image_path):
        raise KeyError("%s does not exist ... " % image_path)
    image = cv2.imread(image_path)
    if self.dataset_type == "converted_coco":
        bboxes = np.array(
            [list(map(int, box.split(","))) for box in line[1:]]
        )
    elif self.dataset_type == "yolo":
        height, width, _ = image.shape
        bboxes = np.array(
            [list(map(float, box.split(","))) for box in line[1:]]
        )
        bboxes = bboxes * np.array([width, height, width, height, 1])
        bboxes = bboxes.astype(np.int64)

    if self.data_aug:
        image, bboxes = self.random_horizontal_flip(
            np.copy(image), np.copy(bboxes)
        )
        image, bboxes = self.random_crop(np.copy(image), np.copy(bboxes))
        image, bboxes = self.random_translate(
            np.copy(image), np.copy(bboxes)
        )

    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image, bboxes = utils.image_preprocess(
        np.copy(image),
        [self.train_input_size, self.train_input_size],
        np.copy(bboxes),
    )
    return image, bboxes

def preprocess_true_boxes(self, bboxes, num_anchors=3):
    label = [
        np.zeros(
            (
                self.train_output_sizes[i],
                self.train_output_sizes[i],
                self.anchor_per_scale,
                5 + self.num_classes,
            )
        )
        for i in range(num_anchors)
    ]
    bboxes_xywh = [np.zeros((self.max_bbox_per_scale, 4)) for _ in range(num_anchors)]
    bbox_count = np.zeros((num_anchors,))

    for bbox in bboxes:
        bbox_coor = bbox[:4]
        bbox_class_ind = bbox[4]

        onehot = np.zeros(self.num_classes, dtype=np.float)
        onehot[bbox_class_ind] = 1.0
        uniform_distribution = np.full(
            self.num_classes, 1.0 / self.num_classes
        )
        deta = 0.01
        smooth_onehot = onehot * (1 - deta) + deta * uniform_distribution

        bbox_xywh = np.concatenate(
            [
                (bbox_coor[2:] + bbox_coor[:2]) * 0.5,
                bbox_coor[2:] - bbox_coor[:2],
            ],
            axis=-1,
        )
        bbox_xywh_scaled = (
            1.0 * bbox_xywh[np.newaxis, :] / self.strides[:, np.newaxis]
        )

        iou = []
        exist_positive = False
        for i in range(num_anchors):
            anchors_xywh = np.zeros((self.anchor_per_scale, 4))
            anchors_xywh[:, 0:2] = (
                np.floor(bbox_xywh_scaled[i, 0:2]).astype(np.int32) + 0.5
            )
            anchors_xywh[:, 2:4] = self.anchors[i]

            iou_scale = utils.bbox_iou(
                bbox_xywh_scaled[i][np.newaxis, :], anchors_xywh
            )
            iou.append(iou_scale)
            iou_mask = iou_scale > 0.3

            if np.any(iou_mask):
                xind, yind = np.floor(bbox_xywh_scaled[i, 0:2]).astype(
                    np.int32
                )

                label[i][yind, xind, iou_mask, :] = 0
                label[i][yind, xind, iou_mask, 0:4] = bbox_xywh
                label[i][yind, xind, iou_mask, 4:5] = 1.0
                label[i][yind, xind, iou_mask, 5:] = smooth_onehot

                bbox_ind = int(bbox_count[i] % self.max_bbox_per_scale)
                bboxes_xywh[i][bbox_ind, :4] = bbox_xywh
                bbox_count[i] += 1

                exist_positive = True

        if not exist_positive:
            best_anchor_ind = np.argmax(np.array(iou).reshape(-1), axis=-1)
            best_detect = int(best_anchor_ind / self.anchor_per_scale)
            best_anchor = int(best_anchor_ind % self.anchor_per_scale)
            xind, yind = np.floor(
                bbox_xywh_scaled[best_detect, 0:2]
            ).astype(np.int32)

            label[best_detect][yind, xind, best_anchor, :] = 0
            label[best_detect][yind, xind, best_anchor, 0:4] = bbox_xywh
            label[best_detect][yind, xind, best_anchor, 4:5] = 1.0
            label[best_detect][yind, xind, best_anchor, 5:] = smooth_onehot

            bbox_ind = int(
                bbox_count[best_detect] % self.max_bbox_per_scale
            )
            bboxes_xywh[best_detect][bbox_ind, :4] = bbox_xywh
            bbox_count[best_detect] += 1
    #label_sbbox, label_mbbox, label_lbbox = label
    #sbboxes, mbboxes, lbboxes = bboxes_xywh
    return label, bboxes_xywh #label_sbbox, label_mbbox, label_lbbox, sbboxes, mbboxes, lbboxes

def __len__(self):
    return self.num_batchs

yp19940913 commented 3 years ago

When I built the yolov4 tiny model, I also encountered the same problem. Have you solved the problem now? How does it work? thank you！！！！！！

If you have not solved this error, I am sharing my code to support yolov4-tiny model. I have updated dataset.py file as below. Hope it helps you.

! /usr/bin/env python

coding=utf-8

import os import cv2 import random import numpy as np import tensorflow as tf import core.utils as utils from core.config import cfg

class Dataset(object): """implement Dataset here"""

def __init__(self, FLAGS, is_training: bool, dataset_type: str = "converted_coco"):
    self.tiny = FLAGS.tiny
    self.strides, self.anchors, NUM_CLASS, XYSCALE = utils.load_config(FLAGS)
    self.dataset_type = dataset_type

    self.annot_path = (
        cfg.TRAIN.ANNOT_PATH if is_training else cfg.TEST.ANNOT_PATH
    )
    self.input_sizes = (
        cfg.TRAIN.INPUT_SIZE if is_training else cfg.TEST.INPUT_SIZE
    )
    self.batch_size = (
        cfg.TRAIN.BATCH_SIZE if is_training else cfg.TEST.BATCH_SIZE
    )
    self.data_aug = cfg.TRAIN.DATA_AUG if is_training else cfg.TEST.DATA_AUG

    self.train_input_sizes = cfg.TRAIN.INPUT_SIZE
    self.classes = utils.read_class_names(cfg.YOLO.CLASSES)
    self.num_classes = len(self.classes)
    self.anchor_per_scale = cfg.YOLO.ANCHOR_PER_SCALE
    self.max_bbox_per_scale = 150

    self.annotations = self.load_annotations()
    self.num_samples = len(self.annotations)
    self.num_batchs = int(np.ceil(self.num_samples / self.batch_size))
    self.batch_count = 0

def load_annotations(self):
    with open(self.annot_path, "r") as f:
        txt = f.readlines()
        if self.dataset_type == "converted_coco":
            annotations = [
                line.strip()
                for line in txt
                if len(line.strip().split()[1:]) != 0
            ]
        elif self.dataset_type == "yolo":
            annotations = []
            for line in txt:
                image_path = line.strip()
                root, _ = os.path.splitext(image_path)
                with open(root + ".txt") as fd:
                    boxes = fd.readlines()
                    string = ""
                    for box in boxes:
                        box = box.strip()
                        box = box.split()
                        class_num = int(box[0])
                        center_x = float(box[1])
                        center_y = float(box[2])
                        half_width = float(box[3]) / 2
                        half_height = float(box[4]) / 2
                        string += " {},{},{},{},{}".format(
                            center_x - half_width,
                            center_y - half_height,
                            center_x + half_width,
                            center_y + half_height,
                            class_num,
                        )
                    annotations.append(image_path + string)

    np.random.shuffle(annotations)
    return annotations

def __iter__(self):
    return self

def __next__(self):
    with tf.device("/cpu:0"):
        # self.train_input_size = random.choice(self.train_input_sizes)
        self.train_input_size = cfg.TRAIN.INPUT_SIZE
        self.train_output_sizes = self.train_input_size // self.strides

        batch_image = np.zeros(
            (
                self.batch_size,
                self.train_input_size,
                self.train_input_size,
                3,
            ),
            dtype=np.float32,
        )

        batch_label_bboxes = []
        batch_bboxes = []
        for size in self.train_output_sizes:
            label_bbox = np.zeros((self.batch_size, size, size, self.anchor_per_scale, 5 + self.num_classes), dtype=np.float32)
            batch_label_bboxes.append(label_bbox)
            batch_bbox = np.zeros((self.batch_size, self.max_bbox_per_scale, 4), dtype=np.float32)
            batch_bboxes.append(batch_bbox)

        num = 0
        if self.batch_count < self.num_batchs:
            while num < self.batch_size:
                index = self.batch_count * self.batch_size + num
                if index >= self.num_samples:
                    index -= self.num_samples
                annotation = self.annotations[index]
                image, bboxes = self.parse_annotation(annotation)
                (
                    label_bboxes,
                    bboxes,
                )  = self.preprocess_true_boxes(bboxes, num_anchors = len(self.train_output_sizes))

                batch_image[num, :, :, :] = image
                for batch_bbox, bbox in zip(batch_bboxes, bboxes):
                    batch_bbox[num,:,:] = bbox
                for batch_label_bbox, label_bbox in zip (batch_label_bboxes, label_bboxes):
                    batch_label_bbox[num, :, :, :] = label_bbox
                num += 1
            self.batch_count += 1
            batch_targets = list(zip(batch_label_bboxes, batch_bboxes))

            return (
                batch_image,
                batch_targets
            )
        else:
            self.batch_count = 0
            np.random.shuffle(self.annotations)
            raise StopIteration

def random_horizontal_flip(self, image, bboxes):
    if random.random() < 0.5:
        _, w, _ = image.shape
        image = image[:, ::-1, :]
        bboxes[:, [0, 2]] = w - bboxes[:, [2, 0]]

    return image, bboxes

def random_crop(self, image, bboxes):
    if random.random() < 0.5:
        h, w, _ = image.shape
        max_bbox = np.concatenate(
            [
                np.min(bboxes[:, 0:2], axis=0),
                np.max(bboxes[:, 2:4], axis=0),
            ],
            axis=-1,
        )

        max_l_trans = max_bbox[0]
        max_u_trans = max_bbox[1]
        max_r_trans = w - max_bbox[2]
        max_d_trans = h - max_bbox[3]

        crop_xmin = max(
            0, int(max_bbox[0] - random.uniform(0, max_l_trans))
        )
        crop_ymin = max(
            0, int(max_bbox[1] - random.uniform(0, max_u_trans))
        )
        crop_xmax = max(
            w, int(max_bbox[2] + random.uniform(0, max_r_trans))
        )
        crop_ymax = max(
            h, int(max_bbox[3] + random.uniform(0, max_d_trans))
        )

        image = image[crop_ymin:crop_ymax, crop_xmin:crop_xmax]

        bboxes[:, [0, 2]] = bboxes[:, [0, 2]] - crop_xmin
        bboxes[:, [1, 3]] = bboxes[:, [1, 3]] - crop_ymin

    return image, bboxes

def random_translate(self, image, bboxes):
    if random.random() < 0.5:
        h, w, _ = image.shape
        max_bbox = np.concatenate(
            [
                np.min(bboxes[:, 0:2], axis=0),
                np.max(bboxes[:, 2:4], axis=0),
            ],
            axis=-1,
        )

        max_l_trans = max_bbox[0]
        max_u_trans = max_bbox[1]
        max_r_trans = w - max_bbox[2]
        max_d_trans = h - max_bbox[3]

        tx = random.uniform(-(max_l_trans - 1), (max_r_trans - 1))
        ty = random.uniform(-(max_u_trans - 1), (max_d_trans - 1))

        M = np.array([[1, 0, tx], [0, 1, ty]])
        image = cv2.warpAffine(image, M, (w, h))

        bboxes[:, [0, 2]] = bboxes[:, [0, 2]] + tx
        bboxes[:, [1, 3]] = bboxes[:, [1, 3]] + ty

    return image, bboxes

def parse_annotation(self, annotation):
    line = annotation.split()
    image_path = line[0]
    if not os.path.exists(image_path):
        raise KeyError("%s does not exist ... " % image_path)
    image = cv2.imread(image_path)
    if self.dataset_type == "converted_coco":
        bboxes = np.array(
            [list(map(int, box.split(","))) for box in line[1:]]
        )
    elif self.dataset_type == "yolo":
        height, width, _ = image.shape
        bboxes = np.array(
            [list(map(float, box.split(","))) for box in line[1:]]
        )
        bboxes = bboxes * np.array([width, height, width, height, 1])
        bboxes = bboxes.astype(np.int64)

    if self.data_aug:
        image, bboxes = self.random_horizontal_flip(
            np.copy(image), np.copy(bboxes)
        )
        image, bboxes = self.random_crop(np.copy(image), np.copy(bboxes))
        image, bboxes = self.random_translate(
            np.copy(image), np.copy(bboxes)
        )

    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image, bboxes = utils.image_preprocess(
        np.copy(image),
        [self.train_input_size, self.train_input_size],
        np.copy(bboxes),
    )
    return image, bboxes

def preprocess_true_boxes(self, bboxes, num_anchors=3):
    label = [
        np.zeros(
            (
                self.train_output_sizes[i],
                self.train_output_sizes[i],
                self.anchor_per_scale,
                5 + self.num_classes,
            )
        )
        for i in range(num_anchors)
    ]
    bboxes_xywh = [np.zeros((self.max_bbox_per_scale, 4)) for _ in range(num_anchors)]
    bbox_count = np.zeros((num_anchors,))

    for bbox in bboxes:
        bbox_coor = bbox[:4]
        bbox_class_ind = bbox[4]

        onehot = np.zeros(self.num_classes, dtype=np.float)
        onehot[bbox_class_ind] = 1.0
        uniform_distribution = np.full(
            self.num_classes, 1.0 / self.num_classes
        )
        deta = 0.01
        smooth_onehot = onehot * (1 - deta) + deta * uniform_distribution

        bbox_xywh = np.concatenate(
            [
                (bbox_coor[2:] + bbox_coor[:2]) * 0.5,
                bbox_coor[2:] - bbox_coor[:2],
            ],
            axis=-1,
        )
        bbox_xywh_scaled = (
            1.0 * bbox_xywh[np.newaxis, :] / self.strides[:, np.newaxis]
        )

        iou = []
        exist_positive = False
        for i in range(num_anchors):
            anchors_xywh = np.zeros((self.anchor_per_scale, 4))
            anchors_xywh[:, 0:2] = (
                np.floor(bbox_xywh_scaled[i, 0:2]).astype(np.int32) + 0.5
            )
            anchors_xywh[:, 2:4] = self.anchors[i]

            iou_scale = utils.bbox_iou(
                bbox_xywh_scaled[i][np.newaxis, :], anchors_xywh
            )
            iou.append(iou_scale)
            iou_mask = iou_scale > 0.3

            if np.any(iou_mask):
                xind, yind = np.floor(bbox_xywh_scaled[i, 0:2]).astype(
                    np.int32
                )

                label[i][yind, xind, iou_mask, :] = 0
                label[i][yind, xind, iou_mask, 0:4] = bbox_xywh
                label[i][yind, xind, iou_mask, 4:5] = 1.0
                label[i][yind, xind, iou_mask, 5:] = smooth_onehot

                bbox_ind = int(bbox_count[i] % self.max_bbox_per_scale)
                bboxes_xywh[i][bbox_ind, :4] = bbox_xywh
                bbox_count[i] += 1

                exist_positive = True

        if not exist_positive:
            best_anchor_ind = np.argmax(np.array(iou).reshape(-1), axis=-1)
            best_detect = int(best_anchor_ind / self.anchor_per_scale)
            best_anchor = int(best_anchor_ind % self.anchor_per_scale)
            xind, yind = np.floor(
                bbox_xywh_scaled[best_detect, 0:2]
            ).astype(np.int32)

            label[best_detect][yind, xind, best_anchor, :] = 0
            label[best_detect][yind, xind, best_anchor, 0:4] = bbox_xywh
            label[best_detect][yind, xind, best_anchor, 4:5] = 1.0
            label[best_detect][yind, xind, best_anchor, 5:] = smooth_onehot

            bbox_ind = int(
                bbox_count[best_detect] % self.max_bbox_per_scale
            )
            bboxes_xywh[best_detect][bbox_ind, :4] = bbox_xywh
            bbox_count[best_detect] += 1
    #label_sbbox, label_mbbox, label_lbbox = label
    #sbboxes, mbboxes, lbboxes = bboxes_xywh
    return label, bboxes_xywh #label_sbbox, label_mbbox, label_lbbox, sbboxes, mbboxes, lbboxes

def __len__(self):
    return self.num_batchs

Thank you for your help and good luck！

ZZHHogan commented 3 years ago

When I built the yolov4 tiny model, I also encountered the same problem. Have you solved the problem now? How does it work? thank you！！！！！！

If you have not solved this error, I am sharing my code to support yolov4-tiny model. I have updated dataset.py file as below. Hope it helps you.

! /usr/bin/env python

coding=utf-8

import os import cv2 import random import numpy as np import tensorflow as tf import core.utils as utils from core.config import cfg

class Dataset(object): """implement Dataset here"""

def __init__(self, FLAGS, is_training: bool, dataset_type: str = "converted_coco"):
    self.tiny = FLAGS.tiny
    self.strides, self.anchors, NUM_CLASS, XYSCALE = utils.load_config(FLAGS)
    self.dataset_type = dataset_type

    self.annot_path = (
        cfg.TRAIN.ANNOT_PATH if is_training else cfg.TEST.ANNOT_PATH
    )
    self.input_sizes = (
        cfg.TRAIN.INPUT_SIZE if is_training else cfg.TEST.INPUT_SIZE
    )
    self.batch_size = (
        cfg.TRAIN.BATCH_SIZE if is_training else cfg.TEST.BATCH_SIZE
    )
    self.data_aug = cfg.TRAIN.DATA_AUG if is_training else cfg.TEST.DATA_AUG

    self.train_input_sizes = cfg.TRAIN.INPUT_SIZE
    self.classes = utils.read_class_names(cfg.YOLO.CLASSES)
    self.num_classes = len(self.classes)
    self.anchor_per_scale = cfg.YOLO.ANCHOR_PER_SCALE
    self.max_bbox_per_scale = 150

    self.annotations = self.load_annotations()
    self.num_samples = len(self.annotations)
    self.num_batchs = int(np.ceil(self.num_samples / self.batch_size))
    self.batch_count = 0

def load_annotations(self):
    with open(self.annot_path, "r") as f:
        txt = f.readlines()
        if self.dataset_type == "converted_coco":
            annotations = [
                line.strip()
                for line in txt
                if len(line.strip().split()[1:]) != 0
            ]
        elif self.dataset_type == "yolo":
            annotations = []
            for line in txt:
                image_path = line.strip()
                root, _ = os.path.splitext(image_path)
                with open(root + ".txt") as fd:
                    boxes = fd.readlines()
                    string = ""
                    for box in boxes:
                        box = box.strip()
                        box = box.split()
                        class_num = int(box[0])
                        center_x = float(box[1])
                        center_y = float(box[2])
                        half_width = float(box[3]) / 2
                        half_height = float(box[4]) / 2
                        string += " {},{},{},{},{}".format(
                            center_x - half_width,
                            center_y - half_height,
                            center_x + half_width,
                            center_y + half_height,
                            class_num,
                        )
                    annotations.append(image_path + string)

    np.random.shuffle(annotations)
    return annotations

def __iter__(self):
    return self

def __next__(self):
    with tf.device("/cpu:0"):
        # self.train_input_size = random.choice(self.train_input_sizes)
        self.train_input_size = cfg.TRAIN.INPUT_SIZE
        self.train_output_sizes = self.train_input_size // self.strides

        batch_image = np.zeros(
            (
                self.batch_size,
                self.train_input_size,
                self.train_input_size,
                3,
            ),
            dtype=np.float32,
        )

        batch_label_bboxes = []
        batch_bboxes = []
        for size in self.train_output_sizes:
            label_bbox = np.zeros((self.batch_size, size, size, self.anchor_per_scale, 5 + self.num_classes), dtype=np.float32)
            batch_label_bboxes.append(label_bbox)
            batch_bbox = np.zeros((self.batch_size, self.max_bbox_per_scale, 4), dtype=np.float32)
            batch_bboxes.append(batch_bbox)

        num = 0
        if self.batch_count < self.num_batchs:
            while num < self.batch_size:
                index = self.batch_count * self.batch_size + num
                if index >= self.num_samples:
                    index -= self.num_samples
                annotation = self.annotations[index]
                image, bboxes = self.parse_annotation(annotation)
                (
                    label_bboxes,
                    bboxes,
                )  = self.preprocess_true_boxes(bboxes, num_anchors = len(self.train_output_sizes))

                batch_image[num, :, :, :] = image
                for batch_bbox, bbox in zip(batch_bboxes, bboxes):
                    batch_bbox[num,:,:] = bbox
                for batch_label_bbox, label_bbox in zip (batch_label_bboxes, label_bboxes):
                    batch_label_bbox[num, :, :, :] = label_bbox
                num += 1
            self.batch_count += 1
            batch_targets = list(zip(batch_label_bboxes, batch_bboxes))

            return (
                batch_image,
                batch_targets
            )
        else:
            self.batch_count = 0
            np.random.shuffle(self.annotations)
            raise StopIteration

def random_horizontal_flip(self, image, bboxes):
    if random.random() < 0.5:
        _, w, _ = image.shape
        image = image[:, ::-1, :]
        bboxes[:, [0, 2]] = w - bboxes[:, [2, 0]]

    return image, bboxes

def random_crop(self, image, bboxes):
    if random.random() < 0.5:
        h, w, _ = image.shape
        max_bbox = np.concatenate(
            [
                np.min(bboxes[:, 0:2], axis=0),
                np.max(bboxes[:, 2:4], axis=0),
            ],
            axis=-1,
        )

        max_l_trans = max_bbox[0]
        max_u_trans = max_bbox[1]
        max_r_trans = w - max_bbox[2]
        max_d_trans = h - max_bbox[3]

        crop_xmin = max(
            0, int(max_bbox[0] - random.uniform(0, max_l_trans))
        )
        crop_ymin = max(
            0, int(max_bbox[1] - random.uniform(0, max_u_trans))
        )
        crop_xmax = max(
            w, int(max_bbox[2] + random.uniform(0, max_r_trans))
        )
        crop_ymax = max(
            h, int(max_bbox[3] + random.uniform(0, max_d_trans))
        )

        image = image[crop_ymin:crop_ymax, crop_xmin:crop_xmax]

        bboxes[:, [0, 2]] = bboxes[:, [0, 2]] - crop_xmin
        bboxes[:, [1, 3]] = bboxes[:, [1, 3]] - crop_ymin

    return image, bboxes

def random_translate(self, image, bboxes):
    if random.random() < 0.5:
        h, w, _ = image.shape
        max_bbox = np.concatenate(
            [
                np.min(bboxes[:, 0:2], axis=0),
                np.max(bboxes[:, 2:4], axis=0),
            ],
            axis=-1,
        )

        max_l_trans = max_bbox[0]
        max_u_trans = max_bbox[1]
        max_r_trans = w - max_bbox[2]
        max_d_trans = h - max_bbox[3]

        tx = random.uniform(-(max_l_trans - 1), (max_r_trans - 1))
        ty = random.uniform(-(max_u_trans - 1), (max_d_trans - 1))

        M = np.array([[1, 0, tx], [0, 1, ty]])
        image = cv2.warpAffine(image, M, (w, h))

        bboxes[:, [0, 2]] = bboxes[:, [0, 2]] + tx
        bboxes[:, [1, 3]] = bboxes[:, [1, 3]] + ty

    return image, bboxes

def parse_annotation(self, annotation):
    line = annotation.split()
    image_path = line[0]
    if not os.path.exists(image_path):
        raise KeyError("%s does not exist ... " % image_path)
    image = cv2.imread(image_path)
    if self.dataset_type == "converted_coco":
        bboxes = np.array(
            [list(map(int, box.split(","))) for box in line[1:]]
        )
    elif self.dataset_type == "yolo":
        height, width, _ = image.shape
        bboxes = np.array(
            [list(map(float, box.split(","))) for box in line[1:]]
        )
        bboxes = bboxes * np.array([width, height, width, height, 1])
        bboxes = bboxes.astype(np.int64)

    if self.data_aug:
        image, bboxes = self.random_horizontal_flip(
            np.copy(image), np.copy(bboxes)
        )
        image, bboxes = self.random_crop(np.copy(image), np.copy(bboxes))
        image, bboxes = self.random_translate(
            np.copy(image), np.copy(bboxes)
        )

    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image, bboxes = utils.image_preprocess(
        np.copy(image),
        [self.train_input_size, self.train_input_size],
        np.copy(bboxes),
    )
    return image, bboxes

def preprocess_true_boxes(self, bboxes, num_anchors=3):
    label = [
        np.zeros(
            (
                self.train_output_sizes[i],
                self.train_output_sizes[i],
                self.anchor_per_scale,
                5 + self.num_classes,
            )
        )
        for i in range(num_anchors)
    ]
    bboxes_xywh = [np.zeros((self.max_bbox_per_scale, 4)) for _ in range(num_anchors)]
    bbox_count = np.zeros((num_anchors,))

    for bbox in bboxes:
        bbox_coor = bbox[:4]
        bbox_class_ind = bbox[4]

        onehot = np.zeros(self.num_classes, dtype=np.float)
        onehot[bbox_class_ind] = 1.0
        uniform_distribution = np.full(
            self.num_classes, 1.0 / self.num_classes
        )
        deta = 0.01
        smooth_onehot = onehot * (1 - deta) + deta * uniform_distribution

        bbox_xywh = np.concatenate(
            [
                (bbox_coor[2:] + bbox_coor[:2]) * 0.5,
                bbox_coor[2:] - bbox_coor[:2],
            ],
            axis=-1,
        )
        bbox_xywh_scaled = (
            1.0 * bbox_xywh[np.newaxis, :] / self.strides[:, np.newaxis]
        )

        iou = []
        exist_positive = False
        for i in range(num_anchors):
            anchors_xywh = np.zeros((self.anchor_per_scale, 4))
            anchors_xywh[:, 0:2] = (
                np.floor(bbox_xywh_scaled[i, 0:2]).astype(np.int32) + 0.5
            )
            anchors_xywh[:, 2:4] = self.anchors[i]

            iou_scale = utils.bbox_iou(
                bbox_xywh_scaled[i][np.newaxis, :], anchors_xywh
            )
            iou.append(iou_scale)
            iou_mask = iou_scale > 0.3

            if np.any(iou_mask):
                xind, yind = np.floor(bbox_xywh_scaled[i, 0:2]).astype(
                    np.int32
                )

                label[i][yind, xind, iou_mask, :] = 0
                label[i][yind, xind, iou_mask, 0:4] = bbox_xywh
                label[i][yind, xind, iou_mask, 4:5] = 1.0
                label[i][yind, xind, iou_mask, 5:] = smooth_onehot

                bbox_ind = int(bbox_count[i] % self.max_bbox_per_scale)
                bboxes_xywh[i][bbox_ind, :4] = bbox_xywh
                bbox_count[i] += 1

                exist_positive = True

        if not exist_positive:
            best_anchor_ind = np.argmax(np.array(iou).reshape(-1), axis=-1)
            best_detect = int(best_anchor_ind / self.anchor_per_scale)
            best_anchor = int(best_anchor_ind % self.anchor_per_scale)
            xind, yind = np.floor(
                bbox_xywh_scaled[best_detect, 0:2]
            ).astype(np.int32)

            label[best_detect][yind, xind, best_anchor, :] = 0
            label[best_detect][yind, xind, best_anchor, 0:4] = bbox_xywh
            label[best_detect][yind, xind, best_anchor, 4:5] = 1.0
            label[best_detect][yind, xind, best_anchor, 5:] = smooth_onehot

            bbox_ind = int(
                bbox_count[best_detect] % self.max_bbox_per_scale
            )
            bboxes_xywh[best_detect][bbox_ind, :4] = bbox_xywh
            bbox_count[best_detect] += 1
    #label_sbbox, label_mbbox, label_lbbox = label
    #sbboxes, mbboxes, lbboxes = bboxes_xywh
    return label, bboxes_xywh #label_sbbox, label_mbbox, label_lbbox, sbboxes, mbboxes, lbboxes

def __len__(self):
    return self.num_batchs

I met the same problem,too.And your solution fix the bug.Thank you!!!

lipeng1109 commented 3 years ago

当我构建yolov4微型模型时，我也遇到了同样的问题。您现在解决问题了吗？它是如何工作的？谢谢！！！！！！

如果您尚未解决此错误，我将共享我的代码以支持yolov4-tiny模型。我已经更新了dataset.py文件，如下所示。希望对您有帮助。＃！/ usr / bin / env python

编码= utf-8

import os import cv2 import随机导入numpy as np import tensorflow as tf import core.utils as utils from core.config import cfg class Dataset（object）： “”“此处实现数据集”“”

def __init__(self, FLAGS, is_training: bool, dataset_type: str = "converted_coco"):
    self.tiny = FLAGS.tiny
    self.strides, self.anchors, NUM_CLASS, XYSCALE = utils.load_config(FLAGS)
    self.dataset_type = dataset_type

    self.annot_path = (
        cfg.TRAIN.ANNOT_PATH if is_training else cfg.TEST.ANNOT_PATH
    )
    self.input_sizes = (
        cfg.TRAIN.INPUT_SIZE if is_training else cfg.TEST.INPUT_SIZE
    )
    self.batch_size = (
        cfg.TRAIN.BATCH_SIZE if is_training else cfg.TEST.BATCH_SIZE
    )
    self.data_aug = cfg.TRAIN.DATA_AUG if is_training else cfg.TEST.DATA_AUG

    self.train_input_sizes = cfg.TRAIN.INPUT_SIZE
    self.classes = utils.read_class_names(cfg.YOLO.CLASSES)
    self.num_classes = len(self.classes)
    self.anchor_per_scale = cfg.YOLO.ANCHOR_PER_SCALE
    self.max_bbox_per_scale = 150

    self.annotations = self.load_annotations()
    self.num_samples = len(self.annotations)
    self.num_batchs = int(np.ceil(self.num_samples / self.batch_size))
    self.batch_count = 0

def load_annotations(self):
    with open(self.annot_path, "r") as f:
        txt = f.readlines()
        if self.dataset_type == "converted_coco":
            annotations = [
                line.strip()
                for line in txt
                if len(line.strip().split()[1:]) != 0
            ]
        elif self.dataset_type == "yolo":
            annotations = []
            for line in txt:
                image_path = line.strip()
                root, _ = os.path.splitext(image_path)
                with open(root + ".txt") as fd:
                    boxes = fd.readlines()
                    string = ""
                    for box in boxes:
                        box = box.strip()
                        box = box.split()
                        class_num = int(box[0])
                        center_x = float(box[1])
                        center_y = float(box[2])
                        half_width = float(box[3]) / 2
                        half_height = float(box[4]) / 2
                        string += " {},{},{},{},{}".format(
                            center_x - half_width,
                            center_y - half_height,
                            center_x + half_width,
                            center_y + half_height,
                            class_num,
                        )
                    annotations.append(image_path + string)

    np.random.shuffle(annotations)
    return annotations

def __iter__(self):
    return self

def __next__(self):
    with tf.device("/cpu:0"):
        # self.train_input_size = random.choice(self.train_input_sizes)
        self.train_input_size = cfg.TRAIN.INPUT_SIZE
        self.train_output_sizes = self.train_input_size // self.strides

        batch_image = np.zeros(
            (
                self.batch_size,
                self.train_input_size,
                self.train_input_size,
                3,
            ),
            dtype=np.float32,
        )

        batch_label_bboxes = []
        batch_bboxes = []
        for size in self.train_output_sizes:
            label_bbox = np.zeros((self.batch_size, size, size, self.anchor_per_scale, 5 + self.num_classes), dtype=np.float32)
            batch_label_bboxes.append(label_bbox)
            batch_bbox = np.zeros((self.batch_size, self.max_bbox_per_scale, 4), dtype=np.float32)
            batch_bboxes.append(batch_bbox)

        num = 0
        if self.batch_count < self.num_batchs:
            while num < self.batch_size:
                index = self.batch_count * self.batch_size + num
                if index >= self.num_samples:
                    index -= self.num_samples
                annotation = self.annotations[index]
                image, bboxes = self.parse_annotation(annotation)
                (
                    label_bboxes,
                    bboxes,
                )  = self.preprocess_true_boxes(bboxes, num_anchors = len(self.train_output_sizes))

                batch_image[num, :, :, :] = image
                for batch_bbox, bbox in zip(batch_bboxes, bboxes):
                    batch_bbox[num,:,:] = bbox
                for batch_label_bbox, label_bbox in zip (batch_label_bboxes, label_bboxes):
                    batch_label_bbox[num, :, :, :] = label_bbox
                num += 1
            self.batch_count += 1
            batch_targets = list(zip(batch_label_bboxes, batch_bboxes))

            return (
                batch_image,
                batch_targets
            )
        else:
            self.batch_count = 0
            np.random.shuffle(self.annotations)
            raise StopIteration

def random_horizontal_flip(self, image, bboxes):
    if random.random() < 0.5:
        _, w, _ = image.shape
        image = image[:, ::-1, :]
        bboxes[:, [0, 2]] = w - bboxes[:, [2, 0]]

    return image, bboxes

def random_crop(self, image, bboxes):
    if random.random() < 0.5:
        h, w, _ = image.shape
        max_bbox = np.concatenate(
            [
                np.min(bboxes[:, 0:2], axis=0),
                np.max(bboxes[:, 2:4], axis=0),
            ],
            axis=-1,
        )

        max_l_trans = max_bbox[0]
        max_u_trans = max_bbox[1]
        max_r_trans = w - max_bbox[2]
        max_d_trans = h - max_bbox[3]

        crop_xmin = max(
            0, int(max_bbox[0] - random.uniform(0, max_l_trans))
        )
        crop_ymin = max(
            0, int(max_bbox[1] - random.uniform(0, max_u_trans))
        )
        crop_xmax = max(
            w, int(max_bbox[2] + random.uniform(0, max_r_trans))
        )
        crop_ymax = max(
            h, int(max_bbox[3] + random.uniform(0, max_d_trans))
        )

        image = image[crop_ymin:crop_ymax, crop_xmin:crop_xmax]

        bboxes[:, [0, 2]] = bboxes[:, [0, 2]] - crop_xmin
        bboxes[:, [1, 3]] = bboxes[:, [1, 3]] - crop_ymin

    return image, bboxes

def random_translate(self, image, bboxes):
    if random.random() < 0.5:
        h, w, _ = image.shape
        max_bbox = np.concatenate(
            [
                np.min(bboxes[:, 0:2], axis=0),
                np.max(bboxes[:, 2:4], axis=0),
            ],
            axis=-1,
        )

        max_l_trans = max_bbox[0]
        max_u_trans = max_bbox[1]
        max_r_trans = w - max_bbox[2]
        max_d_trans = h - max_bbox[3]

        tx = random.uniform(-(max_l_trans - 1), (max_r_trans - 1))
        ty = random.uniform(-(max_u_trans - 1), (max_d_trans - 1))

        M = np.array([[1, 0, tx], [0, 1, ty]])
        image = cv2.warpAffine(image, M, (w, h))

        bboxes[:, [0, 2]] = bboxes[:, [0, 2]] + tx
        bboxes[:, [1, 3]] = bboxes[:, [1, 3]] + ty

    return image, bboxes

def parse_annotation(self, annotation):
    line = annotation.split()
    image_path = line[0]
    if not os.path.exists(image_path):
        raise KeyError("%s does not exist ... " % image_path)
    image = cv2.imread(image_path)
    if self.dataset_type == "converted_coco":
        bboxes = np.array(
            [list(map(int, box.split(","))) for box in line[1:]]
        )
    elif self.dataset_type == "yolo":
        height, width, _ = image.shape
        bboxes = np.array(
            [list(map(float, box.split(","))) for box in line[1:]]
        )
        bboxes = bboxes * np.array([width, height, width, height, 1])
        bboxes = bboxes.astype(np.int64)

    if self.data_aug:
        image, bboxes = self.random_horizontal_flip(
            np.copy(image), np.copy(bboxes)
        )
        image, bboxes = self.random_crop(np.copy(image), np.copy(bboxes))
        image, bboxes = self.random_translate(
            np.copy(image), np.copy(bboxes)
        )

    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image, bboxes = utils.image_preprocess(
        np.copy(image),
        [self.train_input_size, self.train_input_size],
        np.copy(bboxes),
    )
    return image, bboxes

def preprocess_true_boxes(self, bboxes, num_anchors=3):
    label = [
        np.zeros(
            (
                self.train_output_sizes[i],
                self.train_output_sizes[i],
                self.anchor_per_scale,
                5 + self.num_classes,
            )
        )
        for i in range(num_anchors)
    ]
    bboxes_xywh = [np.zeros((self.max_bbox_per_scale, 4)) for _ in range(num_anchors)]
    bbox_count = np.zeros((num_anchors,))

    for bbox in bboxes:
        bbox_coor = bbox[:4]
        bbox_class_ind = bbox[4]

        onehot = np.zeros(self.num_classes, dtype=np.float)
        onehot[bbox_class_ind] = 1.0
        uniform_distribution = np.full(
            self.num_classes, 1.0 / self.num_classes
        )
        deta = 0.01
        smooth_onehot = onehot * (1 - deta) + deta * uniform_distribution

        bbox_xywh = np.concatenate(
            [
                (bbox_coor[2:] + bbox_coor[:2]) * 0.5,
                bbox_coor[2:] - bbox_coor[:2],
            ],
            axis=-1,
        )
        bbox_xywh_scaled = (
            1.0 * bbox_xywh[np.newaxis, :] / self.strides[:, np.newaxis]
        )

        iou = []
        exist_positive = False
        for i in range(num_anchors):
            anchors_xywh = np.zeros((self.anchor_per_scale, 4))
            anchors_xywh[:, 0:2] = (
                np.floor(bbox_xywh_scaled[i, 0:2]).astype(np.int32) + 0.5
            )
            anchors_xywh[:, 2:4] = self.anchors[i]

            iou_scale = utils.bbox_iou(
                bbox_xywh_scaled[i][np.newaxis, :], anchors_xywh
            )
            iou.append(iou_scale)
            iou_mask = iou_scale > 0.3

            if np.any(iou_mask):
                xind, yind = np.floor(bbox_xywh_scaled[i, 0:2]).astype(
                    np.int32
                )

                label[i][yind, xind, iou_mask, :] = 0
                label[i][yind, xind, iou_mask, 0:4] = bbox_xywh
                label[i][yind, xind, iou_mask, 4:5] = 1.0
                label[i][yind, xind, iou_mask, 5:] = smooth_onehot

                bbox_ind = int(bbox_count[i] % self.max_bbox_per_scale)
                bboxes_xywh[i][bbox_ind, :4] = bbox_xywh
                bbox_count[i] += 1

                exist_positive = True

        if not exist_positive:
            best_anchor_ind = np.argmax(np.array(iou).reshape(-1), axis=-1)
            best_detect = int(best_anchor_ind / self.anchor_per_scale)
            best_anchor = int(best_anchor_ind % self.anchor_per_scale)
            xind, yind = np.floor(
                bbox_xywh_scaled[best_detect, 0:2]
            ).astype(np.int32)

            label[best_detect][yind, xind, best_anchor, :] = 0
            label[best_detect][yind, xind, best_anchor, 0:4] = bbox_xywh
            label[best_detect][yind, xind, best_anchor, 4:5] = 1.0
            label[best_detect][yind, xind, best_anchor, 5:] = smooth_onehot

            bbox_ind = int(
                bbox_count[best_detect] % self.max_bbox_per_scale
            )
            bboxes_xywh[best_detect][bbox_ind, :4] = bbox_xywh
            bbox_count[best_detect] += 1
    #label_sbbox, label_mbbox, label_lbbox = label
    #sbboxes, mbboxes, lbboxes = bboxes_xywh
    return label, bboxes_xywh #label_sbbox, label_mbbox, label_lbbox, sbboxes, mbboxes, lbboxes

def __len__(self):
    return self.num_batchs

我也遇到了同样的问题。您的解决方案已修复了该错误。谢谢！！！

我替换使用上面的代码，仍然报这个错，请问你这边还有其他的改动吗

ev1lQuark commented 3 years ago

当我构建yolov4微型模型时，我也遇到了同样的问题。您现在解决问题了吗？它是如何工作的？谢谢！！！！！！

如果您尚未解决此错误，我将共享我的代码以支持yolov4-tiny模型。我已经更新了dataset.py文件，如下所示。希望对您有帮助。＃！/ usr / bin / env python

编码= utf-8

import os import cv2 import随机导入numpy as np import tensorflow as tf import core.utils as utils from core.config import cfg class Dataset（object）： “”“此处实现数据集”“”

def __init__(self, FLAGS, is_training: bool, dataset_type: str = "converted_coco"):
    self.tiny = FLAGS.tiny
    self.strides, self.anchors, NUM_CLASS, XYSCALE = utils.load_config(FLAGS)
    self.dataset_type = dataset_type

    self.annot_path = (
        cfg.TRAIN.ANNOT_PATH if is_training else cfg.TEST.ANNOT_PATH
    )
    self.input_sizes = (
        cfg.TRAIN.INPUT_SIZE if is_training else cfg.TEST.INPUT_SIZE
    )
    self.batch_size = (
        cfg.TRAIN.BATCH_SIZE if is_training else cfg.TEST.BATCH_SIZE
    )
    self.data_aug = cfg.TRAIN.DATA_AUG if is_training else cfg.TEST.DATA_AUG

    self.train_input_sizes = cfg.TRAIN.INPUT_SIZE
    self.classes = utils.read_class_names(cfg.YOLO.CLASSES)
    self.num_classes = len(self.classes)
    self.anchor_per_scale = cfg.YOLO.ANCHOR_PER_SCALE
    self.max_bbox_per_scale = 150

    self.annotations = self.load_annotations()
    self.num_samples = len(self.annotations)
    self.num_batchs = int(np.ceil(self.num_samples / self.batch_size))
    self.batch_count = 0

def load_annotations(self):
    with open(self.annot_path, "r") as f:
        txt = f.readlines()
        if self.dataset_type == "converted_coco":
            annotations = [
                line.strip()
                for line in txt
                if len(line.strip().split()[1:]) != 0
            ]
        elif self.dataset_type == "yolo":
            annotations = []
            for line in txt:
                image_path = line.strip()
                root, _ = os.path.splitext(image_path)
                with open(root + ".txt") as fd:
                    boxes = fd.readlines()
                    string = ""
                    for box in boxes:
                        box = box.strip()
                        box = box.split()
                        class_num = int(box[0])
                        center_x = float(box[1])
                        center_y = float(box[2])
                        half_width = float(box[3]) / 2
                        half_height = float(box[4]) / 2
                        string += " {},{},{},{},{}".format(
                            center_x - half_width,
                            center_y - half_height,
                            center_x + half_width,
                            center_y + half_height,
                            class_num,
                        )
                    annotations.append(image_path + string)

    np.random.shuffle(annotations)
    return annotations

def __iter__(self):
    return self

def __next__(self):
    with tf.device("/cpu:0"):
        # self.train_input_size = random.choice(self.train_input_sizes)
        self.train_input_size = cfg.TRAIN.INPUT_SIZE
        self.train_output_sizes = self.train_input_size // self.strides

        batch_image = np.zeros(
            (
                self.batch_size,
                self.train_input_size,
                self.train_input_size,
                3,
            ),
            dtype=np.float32,
        )

        batch_label_bboxes = []
        batch_bboxes = []
        for size in self.train_output_sizes:
            label_bbox = np.zeros((self.batch_size, size, size, self.anchor_per_scale, 5 + self.num_classes), dtype=np.float32)
            batch_label_bboxes.append(label_bbox)
            batch_bbox = np.zeros((self.batch_size, self.max_bbox_per_scale, 4), dtype=np.float32)
            batch_bboxes.append(batch_bbox)

        num = 0
        if self.batch_count < self.num_batchs:
            while num < self.batch_size:
                index = self.batch_count * self.batch_size + num
                if index >= self.num_samples:
                    index -= self.num_samples
                annotation = self.annotations[index]
                image, bboxes = self.parse_annotation(annotation)
                (
                    label_bboxes,
                    bboxes,
                )  = self.preprocess_true_boxes(bboxes, num_anchors = len(self.train_output_sizes))

                batch_image[num, :, :, :] = image
                for batch_bbox, bbox in zip(batch_bboxes, bboxes):
                    batch_bbox[num,:,:] = bbox
                for batch_label_bbox, label_bbox in zip (batch_label_bboxes, label_bboxes):
                    batch_label_bbox[num, :, :, :] = label_bbox
                num += 1
            self.batch_count += 1
            batch_targets = list(zip(batch_label_bboxes, batch_bboxes))

            return (
                batch_image,
                batch_targets
            )
        else:
            self.batch_count = 0
            np.random.shuffle(self.annotations)
            raise StopIteration

def random_horizontal_flip(self, image, bboxes):
    if random.random() < 0.5:
        _, w, _ = image.shape
        image = image[:, ::-1, :]
        bboxes[:, [0, 2]] = w - bboxes[:, [2, 0]]

    return image, bboxes

def random_crop(self, image, bboxes):
    if random.random() < 0.5:
        h, w, _ = image.shape
        max_bbox = np.concatenate(
            [
                np.min(bboxes[:, 0:2], axis=0),
                np.max(bboxes[:, 2:4], axis=0),
            ],
            axis=-1,
        )

        max_l_trans = max_bbox[0]
        max_u_trans = max_bbox[1]
        max_r_trans = w - max_bbox[2]
        max_d_trans = h - max_bbox[3]

        crop_xmin = max(
            0, int(max_bbox[0] - random.uniform(0, max_l_trans))
        )
        crop_ymin = max(
            0, int(max_bbox[1] - random.uniform(0, max_u_trans))
        )
        crop_xmax = max(
            w, int(max_bbox[2] + random.uniform(0, max_r_trans))
        )
        crop_ymax = max(
            h, int(max_bbox[3] + random.uniform(0, max_d_trans))
        )

        image = image[crop_ymin:crop_ymax, crop_xmin:crop_xmax]

        bboxes[:, [0, 2]] = bboxes[:, [0, 2]] - crop_xmin
        bboxes[:, [1, 3]] = bboxes[:, [1, 3]] - crop_ymin

    return image, bboxes

def random_translate(self, image, bboxes):
    if random.random() < 0.5:
        h, w, _ = image.shape
        max_bbox = np.concatenate(
            [
                np.min(bboxes[:, 0:2], axis=0),
                np.max(bboxes[:, 2:4], axis=0),
            ],
            axis=-1,
        )

        max_l_trans = max_bbox[0]
        max_u_trans = max_bbox[1]
        max_r_trans = w - max_bbox[2]
        max_d_trans = h - max_bbox[3]

        tx = random.uniform(-(max_l_trans - 1), (max_r_trans - 1))
        ty = random.uniform(-(max_u_trans - 1), (max_d_trans - 1))

        M = np.array([[1, 0, tx], [0, 1, ty]])
        image = cv2.warpAffine(image, M, (w, h))

        bboxes[:, [0, 2]] = bboxes[:, [0, 2]] + tx
        bboxes[:, [1, 3]] = bboxes[:, [1, 3]] + ty

    return image, bboxes

def parse_annotation(self, annotation):
    line = annotation.split()
    image_path = line[0]
    if not os.path.exists(image_path):
        raise KeyError("%s does not exist ... " % image_path)
    image = cv2.imread(image_path)
    if self.dataset_type == "converted_coco":
        bboxes = np.array(
            [list(map(int, box.split(","))) for box in line[1:]]
        )
    elif self.dataset_type == "yolo":
        height, width, _ = image.shape
        bboxes = np.array(
            [list(map(float, box.split(","))) for box in line[1:]]
        )
        bboxes = bboxes * np.array([width, height, width, height, 1])
        bboxes = bboxes.astype(np.int64)

    if self.data_aug:
        image, bboxes = self.random_horizontal_flip(
            np.copy(image), np.copy(bboxes)
        )
        image, bboxes = self.random_crop(np.copy(image), np.copy(bboxes))
        image, bboxes = self.random_translate(
            np.copy(image), np.copy(bboxes)
        )

    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image, bboxes = utils.image_preprocess(
        np.copy(image),
        [self.train_input_size, self.train_input_size],
        np.copy(bboxes),
    )
    return image, bboxes

def preprocess_true_boxes(self, bboxes, num_anchors=3):
    label = [
        np.zeros(
            (
                self.train_output_sizes[i],
                self.train_output_sizes[i],
                self.anchor_per_scale,
                5 + self.num_classes,
            )
        )
        for i in range(num_anchors)
    ]
    bboxes_xywh = [np.zeros((self.max_bbox_per_scale, 4)) for _ in range(num_anchors)]
    bbox_count = np.zeros((num_anchors,))

    for bbox in bboxes:
        bbox_coor = bbox[:4]
        bbox_class_ind = bbox[4]

        onehot = np.zeros(self.num_classes, dtype=np.float)
        onehot[bbox_class_ind] = 1.0
        uniform_distribution = np.full(
            self.num_classes, 1.0 / self.num_classes
        )
        deta = 0.01
        smooth_onehot = onehot * (1 - deta) + deta * uniform_distribution

        bbox_xywh = np.concatenate(
            [
                (bbox_coor[2:] + bbox_coor[:2]) * 0.5,
                bbox_coor[2:] - bbox_coor[:2],
            ],
            axis=-1,
        )
        bbox_xywh_scaled = (
            1.0 * bbox_xywh[np.newaxis, :] / self.strides[:, np.newaxis]
        )

        iou = []
        exist_positive = False
        for i in range(num_anchors):
            anchors_xywh = np.zeros((self.anchor_per_scale, 4))
            anchors_xywh[:, 0:2] = (
                np.floor(bbox_xywh_scaled[i, 0:2]).astype(np.int32) + 0.5
            )
            anchors_xywh[:, 2:4] = self.anchors[i]

            iou_scale = utils.bbox_iou(
                bbox_xywh_scaled[i][np.newaxis, :], anchors_xywh
            )
            iou.append(iou_scale)
            iou_mask = iou_scale > 0.3

            if np.any(iou_mask):
                xind, yind = np.floor(bbox_xywh_scaled[i, 0:2]).astype(
                    np.int32
                )

                label[i][yind, xind, iou_mask, :] = 0
                label[i][yind, xind, iou_mask, 0:4] = bbox_xywh
                label[i][yind, xind, iou_mask, 4:5] = 1.0
                label[i][yind, xind, iou_mask, 5:] = smooth_onehot

                bbox_ind = int(bbox_count[i] % self.max_bbox_per_scale)
                bboxes_xywh[i][bbox_ind, :4] = bbox_xywh
                bbox_count[i] += 1

                exist_positive = True

        if not exist_positive:
            best_anchor_ind = np.argmax(np.array(iou).reshape(-1), axis=-1)
            best_detect = int(best_anchor_ind / self.anchor_per_scale)
            best_anchor = int(best_anchor_ind % self.anchor_per_scale)
            xind, yind = np.floor(
                bbox_xywh_scaled[best_detect, 0:2]
            ).astype(np.int32)

            label[best_detect][yind, xind, best_anchor, :] = 0
            label[best_detect][yind, xind, best_anchor, 0:4] = bbox_xywh
            label[best_detect][yind, xind, best_anchor, 4:5] = 1.0
            label[best_detect][yind, xind, best_anchor, 5:] = smooth_onehot

            bbox_ind = int(
                bbox_count[best_detect] % self.max_bbox_per_scale
            )
            bboxes_xywh[best_detect][bbox_ind, :4] = bbox_xywh
            bbox_count[best_detect] += 1
    #label_sbbox, label_mbbox, label_lbbox = label
    #sbboxes, mbboxes, lbboxes = bboxes_xywh
    return label, bboxes_xywh #label_sbbox, label_mbbox, label_lbbox, sbboxes, mbboxes, lbboxes

def __len__(self):
    return self.num_batchs

我也遇到了同样的问题。您的解决方案已修复了该错误。谢谢！！！

我替换使用上面的代码，仍然报这个错，请问你这边还有其他的改动吗

您好，我遇到了同样的问题，#223的第一条回答解决了我的问题。我是因为自己的数据集对应的*.name文件出现了问题，建议您检查一下config.py中__C.YOLO.CLASSES 对应的.name文件