No object detection after exporting the graph

anshkumar commented 5 years ago

I've trained the ssd_mobilenet_v2_coco for my custom dataset, but the problem is that during the training tensorbaord shows detection in the image. It's something like this:

Screen Shot 2019-08-21 at 8 12 19 PM

But when I export the graph and do inference I'm not detecting anything in the image (same image as training). For exporting the graph I'm using this script. The exporter generates the following files:

-rw-r--r-- 1 deploy deploy 77 Aug 22 12:56 checkpoint -rw-r--r-- 1 deploy deploy 19345027 Aug 22 12:56 frozen_inference_graph.pb -rw-r--r-- 1 deploy deploy 2261052 Aug 22 12:56 inference_graph.pbtxt -rw-r--r-- 1 deploy deploy 18828288 Aug 22 12:56 model.ckpt.data-00000-of-00001 -rw-r--r-- 1 deploy deploy 14126 Aug 22 12:56 model.ckpt.index -rw-r--r-- 1 deploy deploy 1305799 Aug 22 12:56 model.ckpt.meta -rw-r--r-- 1 deploy deploy 4390 Aug 22 12:56 pipeline.config drwxr-xr-x 3 deploy deploy 4096 Aug 22 12:56 saved_model

When I use the frozen_inference_graph.pb for inference I'm not getting detection in the images (even the same images which I used during training ). The code which I used for inference is this:

import tensorflow as tf
import numpy as np
import cv2

def get_frozen_graph(graph_file):
    """Read Frozen Graph file from disk."""
    with tf.gfile.FastGFile(graph_file, "rb") as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
    return graph_def

pb_fname = "/Users/vedanshu/frozen_inference_graph.pb"
trt_graph = get_frozen_graph(pb_fname)

input_names = ['image_tensor']

# Create session and load graph

tf_sess = tf.Session()
tf.import_graph_def(trt_graph, name='')

tf_input = tf_sess.graph.get_tensor_by_name(input_names[0] + ':0')
tf_scores = tf_sess.graph.get_tensor_by_name('detection_scores:0')
tf_boxes = tf_sess.graph.get_tensor_by_name('detection_boxes:0')
tf_classes = tf_sess.graph.get_tensor_by_name('detection_classes:0')
tf_num_detections = tf_sess.graph.get_tensor_by_name('num_detections:0')

IMAGE_PATH = "/Users/vedanshu/test.jpg"
image = cv2.imread(IMAGE_PATH)

scores, boxes, classes, num_detections = tf_sess.run([tf_scores, tf_boxes, tf_classes, tf_num_detections], feed_dict={
    tf_input: image[None, ...]
})
boxes = boxes[0]  # index by 0 to remove batch dimension
scores = scores[0]
classes = classes[0]
num_detections = int(num_detections[0])

# Boxes unit in pixels (image coordinates).
boxes_pixels = []
for i in range(num_detections):
    # scale box to image coordinates
    box = boxes[i] * np.array([image.shape[0],
                               image.shape[1], image.shape[0], image.shape[1]])
    box = np.round(box).astype(int)
    boxes_pixels.append(box)
boxes_pixels = np.array(boxes_pixels)

def draw_label(image, point, label, font=cv2.FONT_HERSHEY_SIMPLEX,
               font_scale=0.5, thickness=2):
    size = cv2.getTextSize(label, font, font_scale, thickness)[0]
    x, y = point
    cv2.rectangle(image, (x, y - size[1]),
                  (x + size[0], y), (255, 0, 0), cv2.FILLED)
    cv2.putText(image, label, point, font, font_scale,
                (255, 255, 255), thickness)

for i in range(num_detections):
    if scores[i] > 0.05:
        box = boxes_pixels[i]
        box = np.round(box).astype(int)
        # Draw bounding box.
        image = cv2.rectangle(
            image, (box[1], box[0]), (box[3], box[2]), (0, 255, 0), 2)
        label = "{}:{:.2f}".format(int(classes[i]), scores[i])
        # Draw label (class index and probability).
        draw_label(image, (box[1], box[0]), label)

# Save and display the labeled image.
cv2.imwrite("/Users/vedanshu/out.jpg", image)

The scores after running inference for a sample image is as follows:

In [1]: scores                                                                                                                    
Out[1]: 
array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
      dtype=float32)

But instead of using frozen_inference_graph.pb if I use saved_model/saved_model.pb with the following code I'm getting all the detections:

import tensorflow as tf
import numpy as np
from IPython import embed
import cv2

predict_fn = tf.contrib.predictor.from_saved_model("/Users/vedanshu/ckpt/saved_model/")

IMAGE_PATH = "/Users/vedanshu/test.jpg"
img = cv2.imread(IMAGE_PATH)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img_rgb = np.expand_dims(img, 0)

output_data = predict_fn({"inputs": img_rgb})

scores = output_data['detection_scores']
boxes = output_data['detection_boxes']
classes = output_data['detection_classes']
num_detections = output_data['num_detections']

boxes = boxes[0]  # index by 0 to remove batch dimension
scores = scores[0]
classes = classes[0]
num_detections = int(num_detections[0])

# Boxes unit in pixels (image coordinates).
boxes_pixels = []
for i in range(num_detections):
    # scale box to image coordinates
    box = boxes[i] * np.array([img.shape[0],
                               img.shape[1], img.shape[0], img.shape[1]])
    box = np.round(box).astype(int)
    boxes_pixels.append(box)
boxes_pixels = np.array(boxes_pixels)

def draw_label(image, point, label, font=cv2.FONT_HERSHEY_SIMPLEX,
               font_scale=0.5, thickness=2):
    size = cv2.getTextSize(label, font, font_scale, thickness)[0]
    x, y = point
    cv2.rectangle(image, (x, y - size[1]),
                  (x + size[0], y), (255, 0, 0), cv2.FILLED)
    cv2.putText(image, label, point, font, font_scale,
                (255, 255, 255), thickness)

# for i in pick:
for i in range(num_detections):
    if scores[i] > 0.05:
        box = boxes_pixels[i]
        box = np.round(box).astype(int)
        # Draw bounding box.
        image = cv2.rectangle(
            img, (box[1], box[0]), (box[3], box[2]), (0, 255, 0), 2)
        label = "{}:{:.2f}".format(int(classes[i]), scores[i])
        # Draw label (class index and probability).
        draw_label(image, (box[1], box[0]), label)

# Save and display the labeled image.
image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
cv2.imwrite("/Users/vedanshu/out.jpg", image)

The scores after running inference is now:

In [1]: scores                                                                                                                    
Out[1]: 
array([9.90021527e-01, 9.78490651e-01, 9.64503825e-01, 9.53396082e-01,
       9.14436996e-01, 9.03423905e-01, 8.03212464e-01, 6.11247838e-01,
       ....

       1.02795864e-04, 1.02072328e-04, 1.01457423e-04, 1.00879886e-04],
      dtype=float32)

I don't know why is it happening, but using saved_model/saved_model.pb has reduced the inference speed but accuracy is good. Also, I want to export the model to TensorRT model. But when using saved_model/saved_model.pb with the tf.contrib.predictor.from_saved_model(), I'm simply calling the predictor function; I don't know how to export it to RT graph then.

tensorflowbutler commented 5 years ago

Thank you for your post. We noticed you have not filled out the following field in the issue template. Could you update them if they are relevant in your case, or leave them as N/A? Thanks. What is the top-level directory of the model you are using Have I written custom code OS Platform and Distribution TensorFlow installed from TensorFlow version Bazel version CUDA/cuDNN version GPU model and memory Exact command to reproduce

anshkumar commented 5 years ago

OS: "Debian GNU/Linux 9 (stretch)" TensorFlow version: '1.13.1' TensorFlow installed from: pip3 CUDA Version: 10.0 GPU model: Tesla K80 GPU memory: 11441MiB Exact command to reproduce: python export_inference_graph \ --input_type image_tensor \ --pipeline_config_path path/to/ssd_inception_v2.config \ --trained_checkpoint_prefix path/to/model.ckpt \ --output_directory path/to/exported_model_directory

lorienberne commented 5 years ago

Hello,

I had this problem. Training seems to work with RGB and opencv seems to open images in BGR.

Adding this solved the problem for me.

image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

Hope it helps!

gurucharansrinivas commented 4 years ago

Did you find out any solution, I face the simillar issue

lorienberne commented 4 years ago

It turned out the model was expecting RGB and opencv grab the frames from my camera in BGR. Hope this helps.

Best regards, Lorién Berné Flight Labs +34 659061852 www.flightlabs.es lorienberne@flightlabs.es https://www.facebook.com/flightlabs

El vie., 10 ene. 2020 a las 10:01, gurucharansrinivas (< notifications@github.com>) escribió:

Did you find out any solution, I face the simillar issue

— You are receiving this because you commented. Reply to this email directly, view it on GitHub https://github.com/tensorflow/models/issues/7482?email_source=notifications&email_token=ACAOYXZKYCEXGQQFV24XJMLQ5A2QDA5CNFSM4IOIGMIKYY3PNVWWK3TUL52HS4DFVREXG43VMVBW63LNMVXHJKTDN5WW2ZLOORPWSZGOEITFT4Q#issuecomment-572938738, or unsubscribe https://github.com/notifications/unsubscribe-auth/ACAOYX5XQGBIX3UYKSWTZDDQ5A2QDANCNFSM4IOIGMIA .

hongo2 commented 4 years ago

any news on this having the same issue, works well on eval, model_main_tf2.py but when using exporter_main_tf2.py the resultant saved_model does not perform

tensorflow / models

No object detection after exporting the graph #7482