ultralytics / yolov5

YOLOv5 🚀 in PyTorch > ONNX > CoreML > TFLite
https://docs.ultralytics.com
GNU Affero General Public License v3.0
50.33k stars 16.25k forks source link

mAP@IOU0.5 is different with pycocotools #6666

Closed tenggyut closed 2 years ago

tenggyut commented 2 years ago

Search before asking

Question

I have used this repo(v6.0) to train yolov5s on voc dataset( 07+12 trainval as the training set, and 07 test as the val set), after 30+ epochs, the best mAP@0.5 is about 0.828 calculated by the eval function in val.py。

I then use the best checkpoint to run val.py and save the predictions in txt file, then I wrote a script to load the groundtruthes and the predictions and evaluated the predictions using pycocotools, the mAP@0.5 is about 0.77 which is far less than 0.828....

the commd I use to save predictions is python val.py --data data/voc_rock.yaml --weights runs/train/exp16/weights/best.pt --conf-thres 0.01 --iou-thres 0.45 --save-txt --save-json --half --save-conf

the script I wrote to calcuate coco metric is

#encoding=utf8
from tqdm import tqdm
import os
import numpy as np
from pycocotools.cocoeval import COCOeval
from pycocotools.coco import COCO
from PIL import Image
import xml.etree.ElementTree as ET
from absl import app, flags

FLAGS = flags.FLAGS
flags.DEFINE_string("txt_root", None, 'pred txt file root')
flags.DEFINE_string("xml_root", None, 'gt xml root')
flags.DEFINE_string("img_root", None, 'img root')

def get_class_idx(label_mappings, class_name):
    for idx, name in enumerate(label_mappings):
        alias = name.split(',')
        if len(alias) == 1 and class_name == alias[0]:
            return idx
        if len(alias) > 1 and class_name in alias:
            return idx
    return -1

def load_voc_bboxs(anno_path, label_mappings):
        if os.stat(anno_path).st_size == 0:
            return []

        root = ET.parse(anno_path).getroot()

        tag_num = len(root)
        bboxs = []
        for obj in root.iter('object'):
            difficult_node = obj.find('difficult')
            iscrowd_node = obj.find('iscrowd')

            if difficult_node:
                difficult = difficult_node.text
            else:
                difficult = "0"

            if iscrowd_node:
                iscrowd = iscrowd_node.text
            else:
                iscrowd = "0"

            iscrowd = iscrowd == "1" or iscrowd.lower() == 'true'
            difficult = difficult == "1" or difficult.lower() == 'true'

            if iscrowd: continue

            class_name = obj.find('name').text
            if label_mappings is not None:
                label = get_class_idx(label_mappings, class_name)
                assert label >= 0, (label_mappings, class_name)
            else:
                label = class_name

            xmlbox = obj.find('bndbox')

            xmin = int(float(xmlbox.find('xmin').text))
            ymin = int(float(xmlbox.find('ymin').text))
            xmax = int(float(xmlbox.find('xmax').text))
            ymax = int(float(xmlbox.find('ymax').text))

            xmin = max(0, xmin)
            ymin = max(0, ymin)

            if xmax - xmin <= 1 or ymax - ymin <= 1: continue

            bboxs.append([xmin, ymin, xmax, ymax, label])
        return bboxs

def get_img_size(img_path):
    with Image.open(img_path) as img:
        width, height = img.size
    return (width, height)

def voc_ap(rec, prec, use_07_metric=False):
    """Compute VOC AP given precision and recall. If use_07_metric is true, uses
    the VOC 07 11-point method (default:False).
    """
    if use_07_metric:
        # 11 point metric
        ap = 0.0
        for t in np.arange(0.0, 1.1, 0.1):
            if np.sum(rec >= t) == 0:
                p = 0
            else:
                p = np.max(prec[rec >= t])
            ap = ap + p / 11.0
    else:
        # correct AP calculation
        # first append sentinel values at the end
        mrec = np.concatenate(([0.0], rec, [1.0]))
        mpre = np.concatenate(([0.0], prec, [0.0]))

        # compute the precision envelope
        for i in range(mpre.size - 1, 0, -1):
            mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])

        # to calculate area under PR curve, look for points
        # where X axis (recall) changes value
        i = np.where(mrec[1:] != mrec[:-1])[0]

        # and sum (\Delta recall) * prec
        ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
    return ap

def voc_eval_per_class(coco_res, classname, ovthresh=0.5, use_07_metric=False):
    recs = {}
    imagenames = []
    splitlines_all = []
    for sample in coco_res:
        imagenames.append(sample['img_path'])
        recs[sample['img_path']] = []
        for gt in sample['gt_bboxes']:
            recs[sample['img_path']].append({'name': gt[-1], 'bbox': gt[:4], 'difficult':0 })
        for pred in sample['pred_bboxes']:
            splitlines_all.append([sample['img_path'], pred[4]] + pred[:4] + [pred[-1]])

    # extract gt objects for this class
    class_recs = {}
    npos = 0
    for imagename in imagenames:
        R = [obj for obj in recs[imagename] if obj["name"] == classname]
        bbox = np.array([x["bbox"] for x in R])
        difficult = np.array([x["difficult"] for x in R]).astype(np.bool)
        det = [False] * len(R)
        npos = npos + sum(~difficult)
        class_recs[imagename] = {"bbox": bbox, "difficult": difficult, "det": det}

    splitlines = [x[0:6] for x in splitlines_all if x[-1] == classname]
    image_ids = [x[0] for x in splitlines]
    confidence = np.array([float(x[1]) for x in splitlines])
    BB = np.array([[float(z) for z in x[2:]] for x in splitlines]).reshape(-1, 4)

    # sort by confidence
    sorted_ind = np.argsort(-confidence)
    BB = BB[sorted_ind, :]
    image_ids = [image_ids[x] for x in sorted_ind]

    # go down dets and mark TPs and FPs
    nd = len(image_ids)
    tp = np.zeros(nd)
    fp = np.zeros(nd)
    for d in range(nd):
        R = class_recs[image_ids[d]]
        bb = BB[d, :].astype(float)
        ovmax = -np.inf
        BBGT = R["bbox"].astype(float)

        if BBGT.size > 0:
            # compute overlaps
            # intersection
            ixmin = np.maximum(BBGT[:, 0], bb[0])
            iymin = np.maximum(BBGT[:, 1], bb[1])
            ixmax = np.minimum(BBGT[:, 2], bb[2])
            iymax = np.minimum(BBGT[:, 3], bb[3])
            iw = np.maximum(ixmax - ixmin + 1.0, 0.0)
            ih = np.maximum(iymax - iymin + 1.0, 0.0)
            inters = iw * ih

            # union
            uni = (
                (bb[2] - bb[0] + 1.0) * (bb[3] - bb[1] + 1.0)
                + (BBGT[:, 2] - BBGT[:, 0] + 1.0) * (BBGT[:, 3] - BBGT[:, 1] + 1.0)
                - inters
            )

            overlaps = inters / uni
            ovmax = np.max(overlaps)
            jmax = np.argmax(overlaps)

        if ovmax > ovthresh:
            if not R["difficult"][jmax]:
                if not R["det"][jmax]:
                    tp[d] = 1.0
                    R["det"][jmax] = 1
                else:
                    fp[d] = 1.0
        else:
            fp[d] = 1.0

    # compute precision recall
    fp = np.cumsum(fp)
    tp = np.cumsum(tp)
    rec = tp / float(npos)
    # avoid divide by zero in case the first detection matches a difficult
    # ground truth
    prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
    ap = voc_ap(rec, prec, use_07_metric)

    return rec, prec, ap

def eval_by_voc(coco_res, label_mappings, use_07_metric=False):
    aps = []
    for label_idx, _ in enumerate(label_mappings):
        aps_perclass = {}
        for thresh in range(50, 100, 5):
            rec, prec, ap = voc_eval_per_class(
                coco_res, label_idx,
                ovthresh=thresh / 100.0,
                use_07_metric=use_07_metric,
            )
            aps_perclass[thresh] = ap
        aps.append(aps_perclass)

    res = {}
    res['mAP'] = np.mean([x[50] for x in aps])
    res['mAP_IOU@0.5:0.95'] = np.mean([np.mean(list(x.values())) for x in aps])
    return res

class COCOME(COCO):
    def __init__(self, annos_dict):
        super().__init__()
        self.dataset = annos_dict
        self.createIndex()

def eval_by_coco(coco_res, label_mappings):
    image_infos = []
    categories = []
    for idx, label_name in enumerate(label_mappings):
        categories.append(
            {'id': idx, 'name': label_name.split(',')[0], 'supercategory': ""})

    gt_annotations = []
    pred_annotations = []
    image_id = 0
    bbox_cnt = 0
    for datas in coco_res:
        w, h = get_img_size(datas['img_path'])

        image_infos.append({
            "file_name": datas['img_path'],
            "height": int(h),
            "width": int(w),
            "id": image_id
        })
        pred_bboxs = datas['pred_bboxes']
        gt_bboxs = datas['gt_bboxes']
        for gt_bbox in gt_bboxs:
            gt_annotations.append({
                "image_id": image_id,
                "category_id": int(gt_bbox[-1]),
                "bbox": [float(gt_bbox[0]), float(gt_bbox[1]), float(gt_bbox[2]) - float(gt_bbox[0]), float(gt_bbox[3]) - float(gt_bbox[1])],
                "score": "1.",
                "id": bbox_cnt,
                "area": (float(gt_bbox[2]) - float(gt_bbox[0])) * (float(gt_bbox[3]) - float(gt_bbox[1])),
                "iscrowd": 0,
            })
            bbox_cnt += 1
        for pred_bbox in pred_bboxs:
            pred_annotations.append({
                "image_id": image_id,
                "category_id": int(pred_bbox[-1]),
                "bbox": [float(pred_bbox[0]), float(pred_bbox[1]), float(pred_bbox[2]) - float(pred_bbox[0]), float(pred_bbox[3]) - float(pred_bbox[1])],
                "score": float('%.4f' % pred_bbox[4]),
                "id": bbox_cnt,
                "area": (float(pred_bbox[2]) - float(pred_bbox[0])) * (float(pred_bbox[3]) - float(pred_bbox[1])),
                "iscrowd": 0,
            })
            bbox_cnt += 1

        image_id += 1

    coco_gts = COCOME(annos_dict={'images': image_infos, 'annotations': gt_annotations, 'categories': categories})
    coco_dets = COCOME(annos_dict={'images': image_infos, 'annotations': pred_annotations, 'categories': categories})

    E = COCOeval(coco_gts, coco_dets)
    E.params.iouType = 'bbox'
    E.evaluate()
    E.accumulate()
    E.summarize()
    res = {}
    res['mAP'] = E.stats[1]
    res['mAP_IOU@0.5:0.95'] = E.stats[0]
    # mask_precision = np.copy(E.eval['precision'])
    # mask_precision[mask_precision < min_precision] = 0
    return res

def load_preds(img_abs_path, txt_abs_path, label_mappings):
    if not os.path.isfile(txt_abs_path):
        return []
    img_w, img_h = get_img_size(img_abs_path)

    preds = []
    with open(txt_abs_path) as f:
        for l in f:
            l = l.strip()
            if l:
                #11 0.383138 0.879687 0.361849 0.2125 0.76416
                cls_id, cx, cy, bw, bh, score = l.split(' ')
                label_name = label_mappings[int(cls_id)]
                cx = float(cx)
                cy = float(cy)
                bw = float(bw)
                bh = float(bh)
                x1 = cx - bw / 2
                y1 = cy - bh / 2
                x2 = x1 + bw
                y2 = y1 + bh

                x1 = int(round(x1 * img_w))
                y1 = int(round(y1 * img_h))
                x2 = int(round(x2 * img_w))
                y2 = int(round(y2 * img_h))

                preds.append([x1, y1, x2, y2, float(score), int(cls_id)])
    return preds

def main(unargs):

    label_mappings = ['horse', 'person', 'bottle', 'dog', 'tvmonitor', 'car', 'aeroplane', 'bicycle', 'boat', 'chair', 'diningtable', 'pottedplant', 'train', 'cat', 'sofa', 'bird', 'sheep', 'motorbike', 'bus', 'cow']

    coco_res = []

    for img_file in tqdm(os.listdir(FLAGS.img_root)):
        if not img_file.endswith(('.jpg')):continue
        img_abs_path = os.path.abspath(os.path.join(FLAGS.img_root, img_file))
        img_name, ext = os.path.splitext(os.path.basename(img_abs_path))
        txt_abs_path = os.path.join(FLAGS.txt_root, img_name + '.txt')
        xml_abs_path = os.path.join(FLAGS.xml_root, img_name + '.xml')

        gt_bboxes = load_voc_bboxs(xml_abs_path, label_mappings)
        pred_bboxes = load_preds(img_abs_path, txt_abs_path, label_mappings)

        coco_res.append({'img_path':img_abs_path, 'gt_bboxes': gt_bboxes, 'pred_bboxes': pred_bboxes})

    coco_metric = eval_by_coco(coco_res, label_mappings)
    print(coco_metric)
    voc_metric = eval_by_voc(coco_res, label_mappings)
    print(voc_metric)

if __name__ == '__main__':
    app.run(main)

also I have found a comment that says yolov5s can achive 0.82 on voc2007 test set.

So am I missing something here?

Additional

No response

glenn-jocher commented 2 years ago

@tenggyut we don't provide support for custom code.

Repo mAP is typically lower than pycocotools mAP, i.e. see https://github.com/ultralytics/yolov5/issues/2258

YOLOv5s VOC is about 0.86mAP@0.05 https://wandb.ai/glenn-jocher/VOC

tenggyut commented 2 years ago

problem found! this repo ignores difficult ground truthes which I haven't. I'll close this issue.

Yangly0 commented 2 years ago

Thanks, I get it.

NDTuong commented 2 years ago

@tenggyut we don't provide support for custom code.

Repo mAP is typically lower than pycocotools mAP, i.e. see #2258

YOLOv5s VOC is about 0.86mAP@0.05 https://wandb.ai/glenn-jocher/VOC

do u use VOC 2007 or VOC 2012 or both?

glenn-jocher commented 11 months ago

@NDTuong we use VOC 2007, VOC 2012, and VOC 2007+2012 combined for training. More training details can be found in the Ultralytics Docs: https://docs.ultralytics.com/yolov5/.