tusen-ai / simpledet

A Simple and Versatile Framework for Object Detection and Instance Recognition
Apache License 2.0
3.08k stars 486 forks source link

Single image detection #12

Open wait1988 opened 5 years ago

wait1988 commented 5 years ago

How to do e2e single image detection?Any quick example?

scutzhe commented 5 years ago

I have same idea
you have solved?

xujingtju commented 5 years ago

do you have solved? i do single inference as following code:

import os
from core.detection_module import DetModule
from core.detection_input import Loader
from utils.load_model import load_checkpoint
from six.moves import reduce
from six.moves.queue import Queue
from threading import Thread
import argparse
import importlib
import mxnet as mx
import numpy as np
import six.moves.cPickle as pkl
import json
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
import time
from collections import namedtuple
import cv2
def parse_args():
    parser = argparse.ArgumentParser(description='Test Detection')
    # general
    parser.add_argument('--config', help='config file path', type=str)
    parser.add_argument('--test', help='test file path', type=str)
    args = parser.parse_args()

    config = importlib.import_module(args.config.replace('.py', '').replace('/', '.'))
    return config,args.test

def read_img(img_path):
    image_ori = cv2.imread(img_path, cv2.IMREAD_COLOR)
    #BGR2RGB
    image = image_ori[:, :, ::-1]
    print (image.shape)
    class ResizeParam:
        short = 800
        long = 2000

    p = ResizeParam
    short = min(image.shape[:2])
    long = max(image.shape[:2])
    scale = min(p.short / short, p.long / long)
    image = cv2.resize(image, None, None, scale, scale,interpolation=cv2.INTER_LINEAR)
    #HWC2CHW
    image = image.transpose((2, 0,1))
    image = np.expand_dims(image, axis=0)
    return image_ori,image,scale

def do_nms(all_output,thr):
    box = all_output['bbox_xyxy']
    score = all_output['cls_score']
    final_dets = {}
    #print (box.shape,score.shape)
    for cid in range(score.shape[1]):
        score_cls = score[:, cid]
        valid_inds = np.where(score_cls >thr)[0]
        box_cls = box[valid_inds]
        score_cls = score_cls[valid_inds]
        if valid_inds.shape[0]==0:
            continue
            #print (valid_inds.shape,valid_inds,box_cls,score_cls)
        det = np.concatenate((box_cls, score_cls.reshape(-1, 1)), axis=1).astype(np.float32)
        det = nms(det)
        cls=coco.getCatIds()[cid]
        final_dets[cls] = det
    return final_dets

if __name__ == "__main__":
    os.environ["MXNET_CUDNN_AUTOTUNE_DEFAULT"] = "0"

    config,testfile = parse_args()

    pGen, pKv, pRpn, pRoi, pBbox, pDataset, pModel, pOpt, pTest, \
    transform, data_name, label_name, metric_list = config.get_config(is_train=False)

    sym = pModel.test_symbol
    sym.save(pTest.model.prefix + "_test.json")
    coco = COCO(pTest.coco.annotation)

    ctx = mx.gpu(0)
    arg_params, aux_params = load_checkpoint(pTest.model.prefix, pTest.model.epoch)
    mod = DetModule(sym, data_names=['data','im_info','im_id','rec_id'], context=ctx)
    mod.bind(data_shapes=[('data', (1, 3, 600, 899)), ('im_info', (1, 3)), ('im_id', (1,)), ('rec_id', (1,))], for_training=False)

    if callable(pTest.nms.type):
        nms = pTest.nms.type(pTest.nms.thr)
    else:
        from operator_py.nms import py_nms_wrapper
        nms = py_nms_wrapper(pTest.nms.thr)

    with open(testfile,'r') as rf:
        for line in rf:
            all_outputs = []
            im_ori,im_data,scale = read_img(line.strip())
            h,w=im_data.shape[-2:]
            print (h,w,scale)
            im_info,im_id,rec_id = [(h,w,scale)],[1],[1]
            data = mx.io.DataBatch(data = [mx.nd.array(im_data),mx.nd.array(im_info),mx.nd.array(im_id),mx.nd.array(rec_id)])
            mod.set_params(arg_params, aux_params, allow_extra=False)
            mod.forward(data, is_train=False)
            output = [x.asnumpy() for x in mod.get_outputs()]
            rid, id, info, cls, box = output
            rid, id, info, cls, box = rid.squeeze(), id.squeeze(), info.squeeze(), cls.squeeze(), box.squeeze()
            cls = cls[:, 1:]   # remove background
            box = box / scale

            output_record = dict(
            rec_id=rid,
            im_id=id,
            im_info=info,
            bbox_xyxy=box, 
            cls_score=cls 
             )
            all_outputs.append(output_record)
            all_outputs = pTest.process_output(all_outputs, None)
            thr = 0.5
            final_result = do_nms(all_outputs[0],thr)
            for cid ,bbox in final_result.items():
                idx = np.where(bbox[:,-1]>thr)[0] 
                for i in idx:
                    final_box=bbox[i][:4]
                    score=bbox[i][-1]
                    print ("cls:%s bbox:%s score:%s"%(cid,final_box,score))
feixiangdekaka commented 5 years ago

aise MXNetError(py_str(_LIB.MXGetLastError())) mxnet.base.MXNetError: [17:38:51] include/mxnet/tuple.h:202: Check failed: i >= 0 && i < ndim(): index = 0 must be in range [0, -1)

vedrusss commented 5 years ago

Here I wrote a Detector class based on code above

import os, argparse
import importlib
import json
import time
import cv2
import numpy as np
import mxnet as mx
from   core.detection_module import DetModule
from   utils.load_model      import load_checkpoint

coco = (
'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle',
'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange',
'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed',
'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven',
'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
)

class TDNDetector:
    def __init__(self, configFn, ctx, outFolder, threshold):
        os.environ["MXNET_CUDNN_AUTOTUNE_DEFAULT"] = "0"
        config = importlib.import_module(configFn.replace('.py', '').replace('/', '.'))
        _,_,_,_,_,_, self.__pModel,_, self.__pTest,_,_,_,_ = config.get_config(is_train=False)
        if callable(self.__pTest.nms.type):
            self.__nms = self.__pTest.nms.type(self.__pTest.nms.thr)
        else:
            from operator_py.nms import py_nms_wrapper
            self.__nms = py_nms_wrapper(self.__pTest.nms.thr)
        arg_params, aux_params = load_checkpoint(self.__pTest.model.prefix, self.__pTest.model.epoch)
        sym = self.__pModel.test_symbol
        self.__mod = DetModule(sym, data_names=['data','im_info','im_id','rec_id'], context=ctx)
        self.__mod.bind(data_shapes=[('data', (1, 3, 600, 899)), 
                                     ('im_info', (1, 3)),
                                     ('im_id', (1,)),
                                     ('rec_id', (1,))], for_training=False)
        self.__mod.set_params(arg_params, aux_params, allow_extra=False)
        self.__saveSymbol(sym, outFolder, self.__pTest.model.prefix.split('/')[-1])
        self.__threshold = threshold

    def __call__(self, imgFilename): # detect onto image
        img, im_data, scale = self.__readImg(imgFilename)
        if img is None: return None, None
        h, w = im_data.shape[-2:]
        im_info, im_id, rec_id = [(h, w, scale)], [1], [1] 
        data = mx.io.DataBatch(data=[mx.nd.array(im_data),
                                     mx.nd.array(im_info),
                                     mx.nd.array(im_id),
                                     mx.nd.array(rec_id)])
        self.__mod.forward(data, is_train=False)
        # extract results
        outputs = self.__mod.get_outputs(merge_multi_context=False)
        rid, id, info, cls, box = [x[0].asnumpy() for x in outputs]
        rid, id, info, cls, box = rid.squeeze(), id.squeeze(), info.squeeze(), cls.squeeze(), box.squeeze()
        cls = cls[:, 1:]   # remove background
        box = box / scale
        output_record = dict(rec_id=rid, im_id=id, im_info=info, bbox_xyxy=box, cls_score=cls)
        output_record = self.__pTest.process_output([output_record], None)[0]
        final_result  = self.__do_nms(output_record)
        # obtain representable output
        detections = []
        for cid ,bbox in final_result.items():
            idx = np.where(bbox[:,-1] > self.__threshold)[0] 
            for i in idx:
                final_box = bbox[i][:4]
                score = bbox[i][-1]
                detections.append({'cls':cid, 'box':final_box, 'score':score})
        return detections, img

    def __do_nms(self, all_output):
        box   = all_output['bbox_xyxy']
        score = all_output['cls_score']
        final_dets = {}
        for cid in range(score.shape[1]):
            score_cls = score[:, cid]
            valid_inds = np.where(score_cls > self.__threshold)[0]
            box_cls = box[valid_inds]
            score_cls = score_cls[valid_inds]
            if valid_inds.shape[0]==0:
                continue
            det = np.concatenate((box_cls, score_cls.reshape(-1, 1)), axis=1).astype(np.float32)
            det = self.__nms(det)
            cls = coco[cid]
            final_dets[cls] = det
        return final_dets

    def __readImg(self, imgFilename):
        image_ori = cv2.imread(imgFilename, cv2.IMREAD_COLOR)
        if image_ori is None: return None, None, None
        #BGR2RGB
        image = image_ori[:, :, ::-1]
        resizeParam   = (800, 2000)
        shorts, longs = min(image.shape[:2]), max(image.shape[:2])
        scale = min(resizeParam[0] / shorts, resizeParam[1] / longs)
        image = cv2.resize(image, None, None, scale, scale, interpolation=cv2.INTER_LINEAR)
        #HWC2CHW
        image = image.transpose((2, 0,1))
        image = np.expand_dims(image, axis=0)
        return image_ori, image, scale

    def __saveSymbol(self, sym, outFolder, fnPrefix):
        if not os.path.exists(outFolder): os.makedirs(outFolder)
        resFilename = os.path.join(outFolder, fnPrefix + "_symbol_test.json")
        sym.save(resFilename)

use it like follows:

def parse_args():
    parser = argparse.ArgumentParser(description='Test Detection')
    parser.add_argument('--config', type=str, required=True, help='config file path')
    parser.add_argument('--ctx',    type=int, default=0,     help='GPU index. Set negative value to use CPU')
    parser.add_argument('--inputs', type=str, nargs='+', required=True, help='File(-s) to test')
    parser.add_argument('--output', type=str, default='results', help='Where to store results')
    parser.add_argument('--threshold', type=float, default=0.5,  help='Detector threshold')
    return parser.parse_args()

if __name__ == "__main__":
    args   = parse_args()   
    ctx = mx.gpu(args.ctx) if args.ctx>=0 else args.cpu()
    imgFilenames = args.inputs
    detector = TDNDetector(args.config, ctx, args.output, args.threshold)
    for i, imgFilename in enumerate(imgFilenames):
            dets, img = detector(imgFilename)
Tveek commented 4 years ago

I make the above code more robust

import os, argparse
import importlib
import json
import time
import cv2
import numpy as np
import mxnet as mx
from   core.detection_module import DetModule
from   utils.load_model      import load_checkpoint
from utils.patch_config import patch_config_as_nothrow

coco = (
'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle',
'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange',
'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed',
'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven',
'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
)

class TDNDetector:
    def __init__(self, configFn, ctx, outFolder, threshold):
        os.environ["MXNET_CUDNN_AUTOTUNE_DEFAULT"] = "0"
        config = importlib.import_module(configFn.replace('.py', '').replace('/', '.'))
        _,_,_,_,_,_, self.__pModel,_, self.__pTest, self.transform,_,_,_ = config.get_config(is_train=False)
        self.__pModel = patch_config_as_nothrow(self.__pModel)
        self.__pTest = patch_config_as_nothrow(self.__pTest)
        self.resizeParam = (800, 1200)
        if callable(self.__pTest.nms.type):
            self.__nms = self.__pTest.nms.type(self.__pTest.nms.thr)
        else:
            from operator_py.nms import py_nms_wrapper
            self.__nms = py_nms_wrapper(self.__pTest.nms.thr)
        arg_params, aux_params = load_checkpoint(self.__pTest.model.prefix, self.__pTest.model.epoch)
        sym = self.__pModel.test_symbol
        from utils.graph_optimize import merge_bn
        sym, arg_params, aux_params = merge_bn(sym, arg_params, aux_params)
        self.__mod = DetModule(sym, data_names=['data','im_info','im_id','rec_id'], context=ctx)
        self.__mod.bind(data_shapes=[('data', (1, 3, self.resizeParam[0], self.resizeParam[1])), 
                                     ('im_info', (1, 3)),
                                     ('im_id', (1,)),
                                     ('rec_id', (1,))], for_training=False)
        self.__mod.set_params(arg_params, aux_params, allow_extra=False)
        self.__saveSymbol(sym, outFolder, self.__pTest.model.prefix.split('/')[-1])
        self.__threshold = threshold

    def __call__(self, imgFilename): # detect onto image
        roi_record, scale = self.__readImg(imgFilename)
        h, w = roi_record['data'][0].shape

        im_c1 = roi_record['data'][0].reshape(1,1,h,w)
        im_c2 = roi_record['data'][1].reshape(1,1,h,w)
        im_c3 = roi_record['data'][2].reshape(1,1,h,w)
        im_data = np.concatenate((im_c1, im_c2, im_c3), axis=1)

        im_info, im_id, rec_id = [(h, w, scale)], [1], [1] 
        data = mx.io.DataBatch(data=[mx.nd.array(im_data),
                                     mx.nd.array(im_info),
                                     mx.nd.array(im_id),
                                     mx.nd.array(rec_id)])
        self.__mod.forward(data, is_train=False)
        # extract results
        outputs = self.__mod.get_outputs(merge_multi_context=False)
        rid, id, info, cls, box = [x[0].asnumpy() for x in outputs]
        rid, id, info, cls, box = rid.squeeze(), id.squeeze(), info.squeeze(), cls.squeeze(), box.squeeze()
        cls = cls[:, 1:]   # remove background
        box = box / scale
        output_record = dict(rec_id=rid, im_id=id, im_info=info, bbox_xyxy=box, cls_score=cls)
        output_record = self.__pTest.process_output([output_record], None)[0]
        final_result  = self.__do_nms(output_record)
        # obtain representable output
        detections = []
        for cid ,bbox in final_result.items():
            idx = np.where(bbox[:,-1] > self.__threshold)[0] 
            for i in idx:
                final_box = bbox[i][:4]
                score = bbox[i][-1]
                detections.append({'cls':cid, 'box':final_box, 'score':score})
        return detections,None

    def __do_nms(self, all_output):
        box   = all_output['bbox_xyxy']
        score = all_output['cls_score']
        final_dets = {}
        for cid in range(score.shape[1]):
            score_cls = score[:, cid]
            valid_inds = np.where(score_cls > self.__threshold)[0]
            box_cls = box[valid_inds]
            score_cls = score_cls[valid_inds]
            if valid_inds.shape[0]==0:
                continue
            det = np.concatenate((box_cls, score_cls.reshape(-1, 1)), axis=1).astype(np.float32)
            det = self.__nms(det)
            cls = coco[cid]
            final_dets[cls] = det
        return final_dets

    def __readImg(self, imgFilename):
        img = cv2.imread(imgFilename, cv2.IMREAD_COLOR)
        height, width, channels = img.shape
        roi_record = {'gt_bbox': np.array([[0., 0., 0., 0.]]),'gt_class': np.array([0])}
        roi_record['image_url'] = imgFilename
        roi_record['h'] = height
        roi_record['w'] = width

        for trans in self.transform:
            trans.apply(roi_record)
        img_shape = [roi_record['h'], roi_record['w']]
        shorts, longs = min(img_shape), max(img_shape)
        scale = min(self.resizeParam[0] / shorts, self.resizeParam[1] / longs)

        return roi_record, scale

    def __saveSymbol(self, sym, outFolder, fnPrefix):
        if not os.path.exists(outFolder): os.makedirs(outFolder)
        resFilename = os.path.join(outFolder, fnPrefix + "_symbol_test.json")
        sym.save(resFilename)

use it like follows:

import mxnet as mx
import argparse
from infer import TDNDetector

def parse_args():
    parser = argparse.ArgumentParser(description='Test Detection')
    parser.add_argument('--config', type=str, default='config/faster_r101v2c4_c5_256roi_1x.py', help='config file path')
    parser.add_argument('--ctx',    type=int, default=0,     help='GPU index. Set negative value to use CPU')
    #parser.add_argument('--inputs', type=str, nargs='+', required=True, default='', help='File(-s) to test')
    parser.add_argument('--output', type=str, default='results', help='Where to store results')
    parser.add_argument('--threshold', type=float, default=0.5,  help='Detector threshold')
    return parser.parse_args()

if __name__ == "__main__":
    args = parse_args()   
    ctx = mx.gpu(args.ctx) if args.ctx>=0 else args.cpu()
    #imgFilenames = args.inputs
    imgFilenames = ['car.jpg', 'COCO_val2014_000000581929.jpg']
    detector = TDNDetector(args.config, ctx, args.output, args.threshold)
    for i, imgFilename in enumerate(imgFilenames):
            print(imgFilename)
            dets,_= detector(imgFilename)
            print(dets)
louielu1027 commented 4 years ago

@Tveek Hi, thanks for your code! But,how to do multi-scale test on one image? I want to ues tridentnet_r101v2c4_c5_multiscale_addminival_3x_fp16.py for training and testing.

louielu1027 commented 4 years ago

@xujingtju @vedrusss Hi, how to do multi-scale test on images ? Thank you very much!

tiberium24 commented 4 years ago

How can you load a model exactly from Model Zoo? Or any other pretrained?

mad-fogs commented 4 years ago

@xujingtju , thanks very much for your code, since other codes posted in this issue all return erros in my environment. based on that, i write a batch test demo, many thanks.