This is an API Framework for AI models to be hosted locally or on the AI for Earth API Platform (https://github.com/microsoft/AIforEarth-API-Platform).
MIT License
74
stars
46
forks
source link
when there is no detection above the threshold, render boxes errors #59
I get this traceback because the 5 scores are too love wo be over the .5 threshold. This results in an error because the draw_bounding_boxes_on_image function expects at least one box. A simple fix would be to not call the function if no scores are above the threshold and instead return the original image.
Traceback
render_bounding_boxes(...
(0,)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
in
1 render_bounding_boxes(
----> 2 boxes, scores, clsses, image, confidence_threshold=0.5)
in render_bounding_boxes(boxes, scores, classes, image, label_map, confidence_threshold)
110 display_boxes = np.array(display_boxes)
111 print(display_boxes.shape)
--> 112 draw_bounding_boxes_on_image(image, display_boxes, display_str_list_list=display_strs)
113
114 # the following two functions are from https://github.com/tensorflow/models/blob/master/research/object_detection/utils/visualization_utils.py
in draw_bounding_boxes_on_image(image, boxes, color, thickness, display_str_list_list)
140 return
141 if len(boxes_shape) != 2 or boxes_shape[1] != 4:
--> 142 raise ValueError('Input must be of size [N, 4]')
143 for i in range(boxes_shape[0]):
144 display_str_list = ()
ValueError: Input must be of size [N, 4]
#%%
import tensorflow as tf
import numpy as np
import PIL.Image as Image
import PIL.ImageColor as ImageColor
import PIL.ImageDraw as ImageDraw
import PIL.ImageFont as ImageFont
# Core detection functions
def load_model(checkpoint):
"""Load a detection model (i.e., create a graph) from a .pb file.
Args:
checkpoint: .pb file of the model.
Returns: the loaded graph.
"""
print('tf_detector.py: Loading graph...')
detection_graph = tf.Graph()
with detection_graph.as_default():
od_graph_def = tf.GraphDef()
with tf.gfile.GFile(checkpoint, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def, name='')
print('tf_detector.py: Detection graph loaded.')
return detection_graph
def open_image(image_bytes):
""" Open an image in binary format using PIL.Image and convert to RGB mode
Args:
image_bytes: an image in binary format read from the POST request's body
Returns:
an PIL image object in RGB mode
"""
image = Image.open(image_bytes)
if image.mode not in ('RGBA', 'RGB'):
raise AttributeError('Input image not in RGBA or RGB mode and cannot be processed.')
if image.mode == 'RGBA':
# Image.convert() returns a converted copy of this image
image = image.convert(mode='RGB')
return image
def generate_detections(detection_graph, image):
""" Generates a set of bounding boxes with confidence and class prediction for one input image file.
Args:
detection_graph: an already loaded object detection inference graph.
image_file: a PIL Image object
Returns:
boxes, scores, classes, and the image loaded from the input image_file - for one image
"""
image_np = np.asarray(image, np.uint8)
image_np = image_np[:, :, :3] # Remove the alpha channel
#with detection_graph.as_default():
with tf.Session(graph=detection_graph) as sess:
image_np = np.expand_dims(image_np, axis=0)
# get the operators
image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
box = detection_graph.get_tensor_by_name('detection_boxes:0')
score = detection_graph.get_tensor_by_name('detection_scores:0')
clss = detection_graph.get_tensor_by_name('detection_classes:0')
num_detections = detection_graph.get_tensor_by_name('num_detections:0')
# performs inference
(box, score, clss, num_detections) = sess.run(
[box, score, clss, num_detections],
feed_dict={image_tensor: image_np})
return np.squeeze(box), np.squeeze(score), np.squeeze(clss), image # these are lists of bboxes, scores etc
# Rendering functions
def render_bounding_boxes(boxes, scores, classes, image, label_map={}, confidence_threshold=0.5):
"""Renders bounding boxes, label and confidence on an image if confidence is above the threshold.
Args:
boxes, scores, classes: outputs of generate_detections.
image: PIL.Image object, output of generate_detections.
label_map: optional, mapping the numerical label to a string name.
confidence_threshold: threshold above which the bounding box is rendered.
image is modified in place!
"""
display_boxes = []
display_strs = [] # list of list, one list of strings for each bounding box (to accommodate multiple labels)
for box, score, clss in zip(boxes, scores, classes):
if score > confidence_threshold:
print('Confidence of detection greater than threshold: ', score)
display_boxes.append(box)
clss = int(clss)
label = label_map[clss] if clss in label_map else str(clss)
displayed_label = '{}: {}%'.format(label, round(100*score))
display_strs.append([displayed_label])
display_boxes = np.array(display_boxes)
print(display_boxes.shape)
draw_bounding_boxes_on_image(image, display_boxes, display_str_list_list=display_strs)
# the following two functions are from https://github.com/tensorflow/models/blob/master/research/object_detection/utils/visualization_utils.py
def draw_bounding_boxes_on_image(image,
boxes,
color='LimeGreen',
thickness=4,
display_str_list_list=()):
"""Draws bounding boxes on image.
Args:
image: a PIL.Image object.
boxes: a 2 dimensional numpy array of [N, 4]: (ymin, xmin, ymax, xmax).
The coordinates are in normalized format between [0, 1].
color: color to draw bounding box. Default is red.
thickness: line thickness. Default value is 4.
display_str_list_list: list of list of strings.
a list of strings for each bounding box.
The reason to pass a list of strings for a
bounding box is that it might contain
multiple labels.
Raises:
ValueError: if boxes is not a [N, 4] array
"""
boxes_shape = boxes.shape
if not boxes_shape:
return
if len(boxes_shape) != 2 or boxes_shape[1] != 4:
raise ValueError('Input must be of size [N, 4]')
for i in range(boxes_shape[0]):
display_str_list = ()
if display_str_list_list:
display_str_list = display_str_list_list[i]
draw_bounding_box_on_image(image, boxes[i, 0], boxes[i, 1], boxes[i, 2],
boxes[i, 3], color, thickness, display_str_list)
def draw_bounding_box_on_image(image,
ymin,
xmin,
ymax,
xmax,
color='red',
thickness=4,
display_str_list=(),
use_normalized_coordinates=True):
"""Adds a bounding box to an image.
Bounding box coordinates can be specified in either absolute (pixel) or
normalized coordinates by setting the use_normalized_coordinates argument.
Each string in display_str_list is displayed on a separate line above the
bounding box in black text on a rectangle filled with the input 'color'.
If the top of the bounding box extends to the edge of the image, the strings
are displayed below the bounding box.
Args:
image: a PIL.Image object.
ymin: ymin of bounding box.
xmin: xmin of bounding box.
ymax: ymax of bounding box.
xmax: xmax of bounding box.
color: color to draw bounding box. Default is red.
thickness: line thickness. Default value is 4.
display_str_list: list of strings to display in box
(each to be shown on its own line).
use_normalized_coordinates: If True (default), treat coordinates
ymin, xmin, ymax, xmax as relative to the image. Otherwise treat
coordinates as absolute.
"""
draw = ImageDraw.Draw(image)
im_width, im_height = image.size
if use_normalized_coordinates:
(left, right, top, bottom) = (xmin * im_width, xmax * im_width,
ymin * im_height, ymax * im_height)
else:
(left, right, top, bottom) = (xmin, xmax, ymin, ymax)
draw.line([(left, top), (left, bottom), (right, bottom),
(right, top), (left, top)], width=thickness, fill=color)
try:
font = ImageFont.truetype('arial.ttf', 24)
except IOError:
font = ImageFont.load_default()
# If the total height of the display strings added to the top of the bounding
# box exceeds the top of the image, stack the strings below the bounding box
# instead of above.
display_str_heights = [font.getsize(ds)[1] for ds in display_str_list]
# Each display_str has a top and bottom margin of 0.05x.
total_display_str_height = (1 + 2 * 0.05) * sum(display_str_heights)
if top > total_display_str_height:
text_bottom = top
else:
text_bottom = bottom + total_display_str_height
# Reverse list and print from bottom to top.
for display_str in display_str_list[::-1]:
text_width, text_height = font.getsize(display_str)
margin = np.ceil(0.05 * text_height)
draw.rectangle(
[(left, text_bottom - text_height - 2 * margin), (left + text_width,
text_bottom)],
fill=color)
draw.text(
(left + margin, text_bottom - text_height - margin),
display_str,
fill='black',
font=font)
text_bottom -= text_height - 2 * margin
#%%
model = load_model("./tf_iNat_api/faster_rcnn_resnet50_fgvc_2018_07_19/frozen_inference_graph.pb")
f = open("/home/rave/AIforEarth-API-Development/Examples/tensorflow/2195772708_716d50d8e9.jpg", 'rb')
image = open_image(f)
#%%
boxes, scores, clsses, image = generate_detections(
model, image)
#%%
render_bounding_boxes(
boxes, scores, clsses, image, confidence_threshold=0.5)
I'm using the tensorflow example to profile why rendering the boxes does not work on my own dataset (which I'll post in a separate issue in case anyone has suggestions). When I ran the suggested ResNet 50 faster RCNN model (http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet50_fgvc_2018_07_19.tar.gz) on this image https://farm3.staticflickr.com/2248/2195772708_716d50d8e9.jpg
I get this traceback because the 5 scores are too love wo be over the .5 threshold. This results in an error because the
draw_bounding_boxes_on_image
function expects at least one box. A simple fix would be to not call the function if no scores are above the threshold and instead return the original image.Traceback