Hello everyone,
i’m currently having a problem implementing an object detection model trained by nvidia digits6.0
NVcaffe version : 0.15.14
the python code i’m using from pyimagesearch :
# import the necessary packages
import numpy as np
import argparse
import cv2
# construct the argument parse and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--image", required=True,
help="path to input image")
ap.add_argument("-p", "--prototxt", required=True,
help="path to Caffe 'deploy' prototxt file")
ap.add_argument("-m", "--model", required=True,
help="path to Caffe pre-trained model")
ap.add_argument("-c", "--confidence", type=float, default=0.2,
help="minimum probability to filter weak detections")
args = vars(ap.parse_args())
# initialize the list of class labels MobileNet SSD was trained to
# detect, then generate a set of bounding box colors for each class
CLASSES = ["Carrefour"]
COLORS = np.random.uniform(0, 255, size=(len(CLASSES), 3))
# load our serialized model from disk
print("[INFO] loading model...")
net = cv2.dnn.readNetFromCaffe(args["prototxt"], args["model"])
class ClusterDetections(model):
def __init__(self, params, blobs):
self.is_groundtruth = False
try:
plist = params['param_str'].split(',')
self.image_size_x = int(plist[0])
self.image_size_y = int(plist[1])
self.stride = int(plist[2])
self.gridbox_cvg_threshold = float(plist[3])
self.gridbox_rect_thresh = int(plist[4])
self.gridbox_rect_eps = float(plist[5])
self.min_height = int(plist[6])
self.num_classes = int(plist[7]) if len(plist) > 7 else 1
except ValueError:
raise ValueError("Parameter string missing or data type is wrong!")
def getMemoryShapes(self, bottom):
n_images = bottom[0][0]
num_classes = bottom[0][1]
if num_classes != self.num_classes:
raise ValueError("Unexpected number of classes: %d != %d, bottom[0] shape=%s" % (num_classes, self.num_classes, repr(bottom[0].data.shape)))
top = []
for i in xrange(num_classes):
top.append([1, n_images, MAX_BOXES, 5]) # Make it 4-dimensional
return top
def forward(self, bottom):
top = []
for i in xrange(self.num_classes):
data0 = bottom[0][:,i:i+1,:,:]
bbox = cluster(self, data0, bottom[1])
top.append(np.expand_dims(bbox.astype(np.float32), 0))
return top
cv2.dnn_registerLayer('ClusterDetections', ClusterDetections)
# load the input image and construct an input blob for the image
# by resizing to a fixed 300x300 pixels and then normalizing it
# (note: normalization is done via the authors of the MobileNet SSD
# implementation)
image = cv2.imread(args["image"])
(h, w) = image.shape[:2]
blob = cv2.dnn.blobFromImage(cv2.resize(image, (300, 300)), 0.007843, (300, 300), 127.5)
class ClusterDetections(object):
def __init__(self, params, blobs):
self.is_groundtruth = False
try:
plist = params['param_str'].split(',')
self.image_size_x = int(plist[0])
self.image_size_y = int(plist[1])
self.stride = int(plist[2])
self.gridbox_cvg_threshold = float(plist[3])
self.gridbox_rect_thresh = int(plist[4])
self.gridbox_rect_eps = float(plist[5])
self.min_height = int(plist[6])
self.num_classes = int(plist[7]) if len(plist) > 7 else 1
except ValueError:
raise ValueError("Parameter string missing or data type is wrong!")
def getMemoryShapes(self, bottom):
n_images = bottom[0][0]
num_classes = bottom[0][1]
if num_classes != self.num_classes:
raise ValueError("Unexpected number of classes: %d != %d, bottom[0] shape=%s" % (num_classes, self.num_classes, repr(bottom[0].data.shape)))
top = []
for i in xrange(num_classes):
top.append([1, n_images, MAX_BOXES, 5]) # Make it 4-dimensional
return top
def forward(self, bottom):
top = []
for i in xrange(self.num_classes):
data0 = bottom[0][:,i:i+1,:,:]
bbox = cluster(self, data0, bottom[1])
top.append(np.expand_dims(bbox.astype(np.float32), 0))
return top
def gridbox_to_boxes(net_cvg, net_boxes, self):
im_sz_x = self.image_size_x
im_sz_y = self.image_size_y
stride = self.stride
grid_sz_x = int(im_sz_x / stride)
grid_sz_y = int(im_sz_y / stride)
boxes = []
cvgs = []
cell_width = im_sz_x / grid_sz_x
cell_height = im_sz_y / grid_sz_y
cvg_val = net_cvg[0, 0:grid_sz_y, 0:grid_sz_x]
if (self.is_groundtruth):
mask = (cvg_val > 0)
else:
mask = (cvg_val >= self.gridbox_cvg_threshold)
coord = np.where(mask == 1)
y = np.asarray(coord[0])
x = np.asarray(coord[1])
mx = x * cell_width
my = y * cell_height
x1 = (np.asarray([net_boxes[0][y[i]][x[i]] for i in list(range(x.size))]) + mx)
y1 = (np.asarray([net_boxes[1][y[i]][x[i]] for i in list(range(x.size))]) + my)
x2 = (np.asarray([net_boxes[2][y[i]][x[i]] for i in list(range(x.size))]) + mx)
y2 = (np.asarray([net_boxes[3][y[i]][x[i]] for i in list(range(x.size))]) + my)
boxes = np.transpose(np.vstack((x1, y1, x2, y2)))
cvgs = np.transpose(np.vstack((x, y, np.asarray(
[cvg_val[y[i]][x[i]] for i in list(range(x.size))]))))
return boxes, cvgs, mask
def vote_boxes(propose_boxes, propose_cvgs, mask, self):
""" Vote amongst the boxes using openCV's built-in clustering routine.
"""
detections_per_image = []
if not propose_boxes.any():
return detections_per_image
######################################################################
# GROUP RECTANGLES Clustering
######################################################################
nboxes, weights = cv.groupRectangles(
[[e[0],e[1],e[2]-e[0],e[3]-e[1]] for e in np.array(propose_boxes).tolist()],
self.gridbox_rect_thresh,
self.gridbox_rect_eps)
if len(nboxes):
for rect, weight in zip(nboxes, weights):
if rect[3] >= self.min_height:
confidence = math.log(weight[0])
detection = [rect[0], rect[1], rect[0]+rect[2], rect[1]+rect[3], confidence]
detections_per_image.append(detection)
return detections_per_image
def cluster(self, net_cvg, net_boxes):
"""
Read output of inference and turn into Bounding Boxes
"""
batch_size = net_cvg.shape[0]
boxes = np.zeros([batch_size, MAX_BOXES, 5])
for i in range(batch_size):
cur_cvg = net_cvg[i]
cur_boxes = net_boxes[i]
if (self.is_groundtruth):
# Gather proposals that pass a threshold -
propose_boxes, propose_cvgs, mask = gridbox_to_boxes(
cur_cvg, cur_boxes, self)
# Remove duplicates from ground truth
new_array = list({tuple(row) for row in propose_boxes})
boxes_cur_image = np.asarray(new_array, dtype=np.float16)
else:
# Gather proposals that pass a threshold -
propose_boxes, propose_cvgs, mask = gridbox_to_boxes(cur_cvg, cur_boxes, self)
# Vote across the proposals to get bboxes
boxes_cur_image = vote_boxes(propose_boxes, propose_cvgs, mask, self)
boxes_cur_image = np.asarray(boxes_cur_image, dtype=np.float16)
if (boxes_cur_image.shape[0] != 0):
[r, c] = boxes_cur_image.shape
boxes[i, 0:r, 0:c] = boxes_cur_image
print(boxes)
return boxes
cv.dnn_registerLayer('ClusterDetections', ClusterDetections)
# pass the blob through the network and obtain the detections and
# predictions
print("[INFO] computing object detections...")
net.setInput(blob)
detections = net.forward()
# loop over the detections
for i in np.arange(0, detections.shape[2]):
# extract the confidence (i.e., probability) associated with the
# prediction
confidence = detections[0, 0, i, 2]
# filter out weak detections by ensuring the `confidence` is
# greater than the minimum confidence
if confidence > args["confidence"]:
# extract the index of the class label from the `detections`,
# then compute the (x, y)-coordinates of the bounding box for
# the object
idx = int(detections[0, 0, i, 1])
box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
(startX, startY, endX, endY) = box.astype("int")
# display the prediction
label = "{}: {:.2f}%".format(CLASSES[idx], confidence * 100)
print("[INFO] {}".format(label))
cv2.rectangle(image, (startX, startY), (endX, endY),
COLORS[idx], 2)
y = startY - 15 if startY - 15 > 15 else startY + 15
cv2.putText(image, label, (startX, y),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, COLORS[idx], 2)
# show the output image
cv2.imshow("Output", image)
cv2.waitKey(0)
Hello everyone, i’m currently having a problem implementing an object detection model trained by nvidia digits6.0
NVcaffe version : 0.15.14
the python code i’m using from pyimagesearch :
deploy.prototxt :
when i run it, it freezes and returns zeros as coverage even though i tested it using the Test One on DIGITS and it works