Prerequisites

I am using this model to detect objects in an NPU unit hardware, but the problem is the output of the model, is unable to get box and label class from the output. Please answer the following questions for yourself before submitting an issue.

[Yes ] I am using the latest TensorFlow Model Garden release and TensorFlow 2.
[ Yes] I am reporting the issue to the correct repository. (Model Garden official or research directory)
[ Yes] I checked to make sure that this issue has not been filed already.

1. The entire URL of the file you are using

https://tfhub.dev/iree/lite-model/ssd_mobilenet_v1_100_320/uint8/default/1

2. Describe the bug

The output of the model is not understandable the model with nms works fine with the code below. A clear and concise description of what the bug is.

The only model that works with my code is this one https://tfhub.dev/iree/lite-model/ssd_mobilenet_v1_100_320/uint8/nms/1

Since it is not optimised for int8 input and int8 output.

Please if someone can help me to understand how to get output points

box_predictor_output = interpreter.get_tensor(output_details[0]['index']) class_predictor_output = interpreter.get_tensor(output_details[1]['index'])

from this model https://tfhub.dev/iree/lite-model/ssd_mobilenet_v1_100_320/uint8/default/1


# Load the model
import tflite_runtime.interpreter as tflite
# import tensorflow as tf
import numpy as np
import cv2
image_name="images.jpg"
# image_name="download.jpg"
def load_labels(filename):
  with open(filename, 'r') as f:
    return [line.strip() for line in f.readlines()]
def draw_boxes_on_image(image, output_dict, labels, min_score=0.5):
    input_image = cv2.imread(image_name)  # Replace with the path to your input image

    image = cv2.resize(input_image, (500, 500))  # Match the model's input size (300x300)
    for i in range(output_dict['num_detections']):
        class_id = int(output_dict['detection_classes'][0, i])
        # print(class_id)
        class_name = labels[class_id]
        score = output_dict['detection_scores'][0, i]
        # print(score)
        if score > min_score:
            box = output_dict['detection_boxes'][0, i]
            ymin, xmin, ymax, xmax = box
            height, width, _ = image.shape

            # Convert normalized coordinates to pixel values
            xmin = int(xmin * width)
            xmax = int(xmax * width)
            ymin = int(ymin * height)
            ymax = int(ymax * height)

            # Draw bounding box
            color = (0, 255, 0)  # Green
            thickness = 2
            cv2.rectangle(image, (xmin, ymin), (xmax, ymax), color, thickness)

            # Label with class name and score
            label = f'{class_name} ({score:.2f})'
            font = cv2.FONT_HERSHEY_SIMPLEX
            font_scale = 0.6
            font_thickness = 1
            text_size, _ = cv2.getTextSize(label, font, font_scale, font_thickness)
            cv2.rectangle(image, (xmin, ymin), (xmin + text_size[0], ymin - text_size[1] - 2), color, thickness=cv2.FILLED)
            cv2.putText(image, label, (xmin, ymin - 2), font, font_scale, (0, 0, 0), font_thickness, lineType=cv2.LINE_AA)

    return image
def draw_all_boxes_on_image(image, output_dict, labels):
    for i in range(output_dict['num_detections']):

        image = cv2.resize(image, (500, 500))  # Match the model's input size (300x300)

        class_id = int(output_dict['detection_classes'][0, i])
        class_name = labels[class_id]
        score = output_dict['detection_scores'][0, i]
        box = output_dict['detection_boxes'][0, i]
        ymin, xmin, ymax, xmax = box
        height, width, _ = image.shape

        # Convert normalized coordinates to pixel values
        xmin = int(xmin * width)
        xmax = int(xmax * width)
        ymin = int(ymin * height)
        ymax = int(ymax * height)

        # Draw bounding box
        color = (0, 255, 0)  # Green
        thickness = 2
        cv2.rectangle(image, (xmin, ymin), (xmax, ymax), color, thickness)

        # Label with class name and score
        label = f'{class_name} ({score:.2f})'
        font = cv2.FONT_HERSHEY_SIMPLEX
        font_scale = 0.2
        font_thickness = 1
        text_size, _ = cv2.getTextSize(label, font, font_scale, font_thickness)
        cv2.rectangle(image, (xmin, ymin), (xmin + text_size[0], ymin - text_size[1] - 2), color, thickness=cv2.FILLED)
        cv2.putText(image, label, (xmin, ymin - 2), font, font_scale, (0, 0, 0), font_thickness, lineType=cv2.LINE_AA)

    return image

interpreter = tflite.Interpreter(model_path='lite-model_ssd_mobilenet_v1_100_320_uint8_nms_1.tflite')
interpreter.allocate_tensors()

# Get input and output tensors.
input_details = interpreter.get_input_details()

input_details = interpreter.get_input_details()

# Extract height and width from input details.
input_height = input_details[0]['shape'][1]
input_width = input_details[0]['shape'][2]

output_details = interpreter.get_output_details()

# Load image data with dtype=np.uint8
# input_data = ...

labels = load_labels("coco_labels.txt")  # Replace with the actual label file path

# Load and preprocess the input image
input_image = cv2.imread(image_name)  # Replace with the path to your input image
input_image = cv2.cvtColor(input_image, cv2.COLOR_BGR2RGB)
input_image = cv2.resize(input_image, (input_width, input_height))  # Match the model's input size (300x300)

# Ensure input values are in the range [0, 255]
input_image = np.clip(input_image, 0, 255).astype(np.uint8)

input_image = np.expand_dims(input_image,axis=0)

print(input_image.shape)

# The input data's shape should match input_details[0]['shape'], which is
# BATCH_SIZE x HEIGHT (192) x WIDTH (192) x CHANNELS (3)
interpreter.set_tensor(input_details[0]['index'], input_image)

interpreter.invoke()

output_details = interpreter.get_output_details()
# print(output_details[0])
# out_details_1 = output_details[0]
# output_tensors = [interpreter.get_tensor(out_details_1['index']) for output_detail in out_details_1]
# print(output_tensors)
# The function `get_tensor()` returns a copy of the tensor data.
# Use `tensor()` in order to get a pointer to the tensor.
output_dict = {
    'num_detections': int(interpreter.get_tensor(output_details[3]["index"])),
    'detection_classes': interpreter.get_tensor(output_details[1]["index"]).astype(np.uint8),
    'detection_boxes' : interpreter.get_tensor(output_details[0]["index"]),
    'detection_scores' : interpreter.get_tensor(output_details[2]["index"])
    }

# output_dict = {
#     'detection_classes': interpreter.get_tensor(output_details[1]["index"]).astype(np.uint8),
#     'detection_boxes' : interpreter.get_tensor(output_details[0]["index"])
#     }
print(output_dict)
# Get the output tensors
# box_predictor_output = interpreter.get_tensor(output_details[0]['index'])
# class_predictor_output = interpreter.get_tensor(output_details[1]['index'])

# # You may need to reshape the outputs to match the expected shape
# # The shape should be [batch, height, width, box_encoding * num_anchors] for box predictor
# # and [batch, height, width, num_classes * num_anchors] for class predictor
# batch_size, height, width, _ = box_predictor_output.shape  # Extract height and width
# print(batch_size,height ,width  )
# # The following code is assuming a specific arrangement of data
# # If your model's data layout is different, adjust accordingly

# # Reshape the box predictor output
# box_encoding_size = 4  # You mentioned [y_center, x_center, box_height, box_width]
# num_anchors = box_predictor_output.shape[-1] // box_encoding_size
# box_predictor_output = box_predictor_output.reshape((batch_size, height, width, box_encoding_size, num_anchors))

# # Reshape the class predictor output
# num_classes = 91  # Number of classes, including the background class
# num_anchors = class_predictor_output.shape[-1] // num_classes
# class_predictor_output = class_predictor_output.reshape((batch_size, height, width, num_classes, num_anchors))

# print(box_predictor_output)
# print(class_predictor_output)

output_image = draw_boxes_on_image(input_image[0], output_dict, labels)
# # print(labels)
# # # Display the image with bounding boxes
# # cv2.imshow("Object Detection", output_image)
# # cv2.waitKey(0)
# # # cv2.destroyAllWindows()
# # # for i in labels:
# # output_image = draw_boxes_on_image(input_image[0], output_dict, labels)

# # # Display the image with all detected bounding boxes
cv2.imshow("Object Detection", output_image)
# # Load and preprocess the input image
# # input_image = cv2.imread("your_input_image.jpg")  # Replace with the path to your input image
# # input_image = cv2.cvtColor(input_image, cv2.COLOR_BGR2RGB)

# # Loop through each anchor box and draw it on the image
# input_image = cv2.imread(image_name)  # Replace with the path to your input image
# input_image = cv2.cvtColor(input_image, cv2.COLOR_BGR2RGB)

# for i in range(num_anchors):
#     box_coordinates = box_predictor_output[0, :, :, :, i]
#     class_scores = class_predictor_output[0, :, :, :, i]

#     # Threshold the class scores to identify objects
#     class_threshold = 0.5  # Adjust this threshold as needed
#     object_indices = np.where(class_scores > class_threshold)

#     for index in range(len(object_indices[0])):
#         y, x = object_indices[0][index], object_indices[1][index]
#         box = box_coordinates[y, x]

#         ymin, xmin, ymax, xmax = box
#         height, width, _ = input_image.shape
#         # print(height)

#         # Convert normalized coordinates to pixel values
#         xmin = int(xmin * width)
#         xmax = int(xmax * width)
#         ymin = int(ymin * height)
#         ymax = int(ymax * height)

#         # Draw bounding box
#         color = (0, 255, 0)  # Green
#         thickness = 2
#         cv2.rectangle(input_image, (xmin, ymin), (xmax, ymax), color, thickness)

#         # Get the class label
#         class_id = np.argmax(class_scores[y, x])
#         class_name = labels[class_id]

#         # Label with class name and score
#         label = f'{class_name} ({class_scores[y, x, class_id]:.2f})'
#         font = cv2.FONT_HERSHEY_SIMPLEX
#         font_scale = 0.6
#         font_thickness = 1
#         text_size, _ = cv2.getTextSize(label, font, font_scale, font_thickness)
#         cv2.rectangle(input_image, (xmin, ymin), (xmin + text_size[0], ymin - text_size[1] - 2), color, thickness=cv2.FILLED)
#         cv2.putText(input_image, label, (xmin, ymin - 2), font, font_scale, (0, 0, 0), font_thickness, lineType=cv2.LINE_AA)

# # Display the image with bounding boxes and labels
cv2.imshow("Object Detection", output_image)
cv2.waitKey(0)

tensorflow / models

Trying to use this model https://tfhub.dev/iree/lite-model/ssd_mobilenet_v1_100_320/uint8/default/1 #11104

Prerequisites

1. The entire URL of the file you are using

2. Describe the bug