microsoft / AIforEarth-API-Development

This is an API Framework for AI models to be hosted locally or on the AI for Earth API Platform (https://github.com/microsoft/AIforEarth-API-Platform).
MIT License
74 stars 46 forks source link

when there is no detection above the threshold, render boxes errors #59

Closed rbavery closed 4 years ago

rbavery commented 5 years ago

I'm using the tensorflow example to profile why rendering the boxes does not work on my own dataset (which I'll post in a separate issue in case anyone has suggestions). When I ran the suggested ResNet 50 faster RCNN model (http://download.tensorflow.org/models/object_detection/faster_rcnn_resnet50_fgvc_2018_07_19.tar.gz) on this image https://farm3.staticflickr.com/2248/2195772708_716d50d8e9.jpg

I get this traceback because the 5 scores are too love wo be over the .5 threshold. This results in an error because the draw_bounding_boxes_on_image function expects at least one box. A simple fix would be to not call the function if no scores are above the threshold and instead return the original image.

Traceback

render_bounding_boxes(...
(0,)
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
 in 
      1 render_bounding_boxes(
----> 2             boxes, scores, clsses, image, confidence_threshold=0.5)

 in render_bounding_boxes(boxes, scores, classes, image, label_map, confidence_threshold)
    110     display_boxes = np.array(display_boxes)
    111     print(display_boxes.shape)
--> 112     draw_bounding_boxes_on_image(image, display_boxes, display_str_list_list=display_strs)
    113 
    114 # the following two functions are from https://github.com/tensorflow/models/blob/master/research/object_detection/utils/visualization_utils.py

 in draw_bounding_boxes_on_image(image, boxes, color, thickness, display_str_list_list)
    140     return
    141   if len(boxes_shape) != 2 or boxes_shape[1] != 4:
--> 142     raise ValueError('Input must be of size [N, 4]')
    143   for i in range(boxes_shape[0]):
    144     display_str_list = ()

ValueError: Input must be of size [N, 4]
#%%

import tensorflow as tf
import numpy as np
import PIL.Image as Image
import PIL.ImageColor as ImageColor
import PIL.ImageDraw as ImageDraw
import PIL.ImageFont as ImageFont

# Core detection functions

def load_model(checkpoint):
    """Load a detection model (i.e., create a graph) from a .pb file.

    Args:
        checkpoint: .pb file of the model.

    Returns: the loaded graph.

    """
    print('tf_detector.py: Loading graph...')
    detection_graph = tf.Graph()
    with detection_graph.as_default():
        od_graph_def = tf.GraphDef()
        with tf.gfile.GFile(checkpoint, 'rb') as fid:
            serialized_graph = fid.read()
            od_graph_def.ParseFromString(serialized_graph)
            tf.import_graph_def(od_graph_def, name='')
    print('tf_detector.py: Detection graph loaded.')

    return detection_graph

def open_image(image_bytes):
    """ Open an image in binary format using PIL.Image and convert to RGB mode
    Args:
        image_bytes: an image in binary format read from the POST request's body

    Returns:
        an PIL image object in RGB mode
    """
    image = Image.open(image_bytes)
    if image.mode not in ('RGBA', 'RGB'):
        raise AttributeError('Input image not in RGBA or RGB mode and cannot be processed.')
    if image.mode == 'RGBA':
        # Image.convert() returns a converted copy of this image
        image = image.convert(mode='RGB')
    return image

def generate_detections(detection_graph, image):
    """ Generates a set of bounding boxes with confidence and class prediction for one input image file.

    Args:
        detection_graph: an already loaded object detection inference graph.
        image_file: a PIL Image object

    Returns:
        boxes, scores, classes, and the image loaded from the input image_file - for one image
    """
    image_np = np.asarray(image, np.uint8)
    image_np = image_np[:, :, :3] # Remove the alpha channel

    #with detection_graph.as_default():
    with tf.Session(graph=detection_graph) as sess:
        image_np = np.expand_dims(image_np, axis=0)

        # get the operators
        image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
        box = detection_graph.get_tensor_by_name('detection_boxes:0')
        score = detection_graph.get_tensor_by_name('detection_scores:0')
        clss = detection_graph.get_tensor_by_name('detection_classes:0')
        num_detections = detection_graph.get_tensor_by_name('num_detections:0')

        # performs inference
        (box, score, clss, num_detections) = sess.run(
            [box, score, clss, num_detections],
            feed_dict={image_tensor: image_np})

    return np.squeeze(box), np.squeeze(score), np.squeeze(clss), image  # these are lists of bboxes, scores etc

# Rendering functions

def render_bounding_boxes(boxes, scores, classes, image, label_map={}, confidence_threshold=0.5):
    """Renders bounding boxes, label and confidence on an image if confidence is above the threshold.

    Args:
        boxes, scores, classes:  outputs of generate_detections.
        image: PIL.Image object, output of generate_detections.
        label_map: optional, mapping the numerical label to a string name.
        confidence_threshold: threshold above which the bounding box is rendered.

    image is modified in place!

    """
    display_boxes = []
    display_strs = []  # list of list, one list of strings for each bounding box (to accommodate multiple labels)

    for box, score, clss in zip(boxes, scores, classes):
        if score > confidence_threshold:
            print('Confidence of detection greater than threshold: ', score)
            display_boxes.append(box)
            clss = int(clss)
            label = label_map[clss] if clss in label_map else str(clss)
            displayed_label = '{}: {}%'.format(label, round(100*score))
            display_strs.append([displayed_label])

    display_boxes = np.array(display_boxes)
    print(display_boxes.shape)
    draw_bounding_boxes_on_image(image, display_boxes, display_str_list_list=display_strs)

# the following two functions are from https://github.com/tensorflow/models/blob/master/research/object_detection/utils/visualization_utils.py

def draw_bounding_boxes_on_image(image,
                                 boxes,
                                 color='LimeGreen',
                                 thickness=4,
                                 display_str_list_list=()):
  """Draws bounding boxes on image.

  Args:
    image: a PIL.Image object.
    boxes: a 2 dimensional numpy array of [N, 4]: (ymin, xmin, ymax, xmax).
           The coordinates are in normalized format between [0, 1].
    color: color to draw bounding box. Default is red.
    thickness: line thickness. Default value is 4.
    display_str_list_list: list of list of strings.
                           a list of strings for each bounding box.
                           The reason to pass a list of strings for a
                           bounding box is that it might contain
                           multiple labels.

  Raises:
    ValueError: if boxes is not a [N, 4] array
  """
  boxes_shape = boxes.shape
  if not boxes_shape:
    return
  if len(boxes_shape) != 2 or boxes_shape[1] != 4:
    raise ValueError('Input must be of size [N, 4]')
  for i in range(boxes_shape[0]):
    display_str_list = ()
    if display_str_list_list:
      display_str_list = display_str_list_list[i]
    draw_bounding_box_on_image(image, boxes[i, 0], boxes[i, 1], boxes[i, 2],
                               boxes[i, 3], color, thickness, display_str_list)

def draw_bounding_box_on_image(image,
                               ymin,
                               xmin,
                               ymax,
                               xmax,
                               color='red',
                               thickness=4,
                               display_str_list=(),
                               use_normalized_coordinates=True):
  """Adds a bounding box to an image.

  Bounding box coordinates can be specified in either absolute (pixel) or
  normalized coordinates by setting the use_normalized_coordinates argument.

  Each string in display_str_list is displayed on a separate line above the
  bounding box in black text on a rectangle filled with the input 'color'.
  If the top of the bounding box extends to the edge of the image, the strings
  are displayed below the bounding box.

  Args:
    image: a PIL.Image object.
    ymin: ymin of bounding box.
    xmin: xmin of bounding box.
    ymax: ymax of bounding box.
    xmax: xmax of bounding box.
    color: color to draw bounding box. Default is red.
    thickness: line thickness. Default value is 4.
    display_str_list: list of strings to display in box
                      (each to be shown on its own line).
    use_normalized_coordinates: If True (default), treat coordinates
      ymin, xmin, ymax, xmax as relative to the image.  Otherwise treat
      coordinates as absolute.
  """
  draw = ImageDraw.Draw(image)
  im_width, im_height = image.size
  if use_normalized_coordinates:
    (left, right, top, bottom) = (xmin * im_width, xmax * im_width,
                                  ymin * im_height, ymax * im_height)
  else:
    (left, right, top, bottom) = (xmin, xmax, ymin, ymax)
  draw.line([(left, top), (left, bottom), (right, bottom),
             (right, top), (left, top)], width=thickness, fill=color)
  try:
    font = ImageFont.truetype('arial.ttf', 24)
  except IOError:
    font = ImageFont.load_default()

  # If the total height of the display strings added to the top of the bounding
  # box exceeds the top of the image, stack the strings below the bounding box
  # instead of above.
  display_str_heights = [font.getsize(ds)[1] for ds in display_str_list]
  # Each display_str has a top and bottom margin of 0.05x.
  total_display_str_height = (1 + 2 * 0.05) * sum(display_str_heights)

  if top > total_display_str_height:
    text_bottom = top
  else:
    text_bottom = bottom + total_display_str_height
  # Reverse list and print from bottom to top.
  for display_str in display_str_list[::-1]:
    text_width, text_height = font.getsize(display_str)
    margin = np.ceil(0.05 * text_height)
    draw.rectangle(
        [(left, text_bottom - text_height - 2 * margin), (left + text_width,
                                                          text_bottom)],
        fill=color)
    draw.text(
        (left + margin, text_bottom - text_height - margin),
        display_str,
        fill='black',
        font=font)
    text_bottom -= text_height - 2 * margin
#%%
model = load_model("./tf_iNat_api/faster_rcnn_resnet50_fgvc_2018_07_19/frozen_inference_graph.pb")

f = open("/home/rave/AIforEarth-API-Development/Examples/tensorflow/2195772708_716d50d8e9.jpg", 'rb')
image = open_image(f)

#%%
boxes, scores, clsses, image = generate_detections(
            model, image)

#%%
render_bounding_boxes(
            boxes, scores, clsses, image, confidence_threshold=0.5)