tensorflow / models

Models and examples built with TensorFlow
Other
76.79k stars 45.84k forks source link

Trying to use this model https://tfhub.dev/iree/lite-model/ssd_mobilenet_v1_100_320/uint8/default/1 #11104

Open dev-techshlok opened 8 months ago

dev-techshlok commented 8 months ago

Prerequisites

I am using this model to detect objects in an NPU unit hardware, but the problem is the output of the model, is unable to get box and label class from the output. Please answer the following questions for yourself before submitting an issue.

1. The entire URL of the file you are using

https://tfhub.dev/iree/lite-model/ssd_mobilenet_v1_100_320/uint8/default/1

2. Describe the bug

The output of the model is not understandable the model with nms works fine with the code below. A clear and concise description of what the bug is.

The only model that works with my code is this one https://tfhub.dev/iree/lite-model/ssd_mobilenet_v1_100_320/uint8/nms/1

Since it is not optimised for int8 input and int8 output.

Please if someone can help me to understand how to get output points

box_predictor_output = interpreter.get_tensor(output_details[0]['index']) class_predictor_output = interpreter.get_tensor(output_details[1]['index'])

from this model https://tfhub.dev/iree/lite-model/ssd_mobilenet_v1_100_320/uint8/default/1


# Load the model
import tflite_runtime.interpreter as tflite
# import tensorflow as tf
import numpy as np
import cv2
image_name="images.jpg"
# image_name="download.jpg"
def load_labels(filename):
  with open(filename, 'r') as f:
    return [line.strip() for line in f.readlines()]
def draw_boxes_on_image(image, output_dict, labels, min_score=0.5):
    input_image = cv2.imread(image_name)  # Replace with the path to your input image

    image = cv2.resize(input_image, (500, 500))  # Match the model's input size (300x300)
    for i in range(output_dict['num_detections']):
        class_id = int(output_dict['detection_classes'][0, i])
        # print(class_id)
        class_name = labels[class_id]
        score = output_dict['detection_scores'][0, i]
        # print(score)
        if score > min_score:
            box = output_dict['detection_boxes'][0, i]
            ymin, xmin, ymax, xmax = box
            height, width, _ = image.shape

            # Convert normalized coordinates to pixel values
            xmin = int(xmin * width)
            xmax = int(xmax * width)
            ymin = int(ymin * height)
            ymax = int(ymax * height)

            # Draw bounding box
            color = (0, 255, 0)  # Green
            thickness = 2
            cv2.rectangle(image, (xmin, ymin), (xmax, ymax), color, thickness)

            # Label with class name and score
            label = f'{class_name} ({score:.2f})'
            font = cv2.FONT_HERSHEY_SIMPLEX
            font_scale = 0.6
            font_thickness = 1
            text_size, _ = cv2.getTextSize(label, font, font_scale, font_thickness)
            cv2.rectangle(image, (xmin, ymin), (xmin + text_size[0], ymin - text_size[1] - 2), color, thickness=cv2.FILLED)
            cv2.putText(image, label, (xmin, ymin - 2), font, font_scale, (0, 0, 0), font_thickness, lineType=cv2.LINE_AA)

    return image
def draw_all_boxes_on_image(image, output_dict, labels):
    for i in range(output_dict['num_detections']):

        image = cv2.resize(image, (500, 500))  # Match the model's input size (300x300)

        class_id = int(output_dict['detection_classes'][0, i])
        class_name = labels[class_id]
        score = output_dict['detection_scores'][0, i]
        box = output_dict['detection_boxes'][0, i]
        ymin, xmin, ymax, xmax = box
        height, width, _ = image.shape

        # Convert normalized coordinates to pixel values
        xmin = int(xmin * width)
        xmax = int(xmax * width)
        ymin = int(ymin * height)
        ymax = int(ymax * height)

        # Draw bounding box
        color = (0, 255, 0)  # Green
        thickness = 2
        cv2.rectangle(image, (xmin, ymin), (xmax, ymax), color, thickness)

        # Label with class name and score
        label = f'{class_name} ({score:.2f})'
        font = cv2.FONT_HERSHEY_SIMPLEX
        font_scale = 0.2
        font_thickness = 1
        text_size, _ = cv2.getTextSize(label, font, font_scale, font_thickness)
        cv2.rectangle(image, (xmin, ymin), (xmin + text_size[0], ymin - text_size[1] - 2), color, thickness=cv2.FILLED)
        cv2.putText(image, label, (xmin, ymin - 2), font, font_scale, (0, 0, 0), font_thickness, lineType=cv2.LINE_AA)

    return image

interpreter = tflite.Interpreter(model_path='lite-model_ssd_mobilenet_v1_100_320_uint8_nms_1.tflite')
interpreter.allocate_tensors()

# Get input and output tensors.
input_details = interpreter.get_input_details()

input_details = interpreter.get_input_details()

# Extract height and width from input details.
input_height = input_details[0]['shape'][1]
input_width = input_details[0]['shape'][2]

output_details = interpreter.get_output_details()

# Load image data with dtype=np.uint8
# input_data = ...

labels = load_labels("coco_labels.txt")  # Replace with the actual label file path

# Load and preprocess the input image
input_image = cv2.imread(image_name)  # Replace with the path to your input image
input_image = cv2.cvtColor(input_image, cv2.COLOR_BGR2RGB)
input_image = cv2.resize(input_image, (input_width, input_height))  # Match the model's input size (300x300)

# Ensure input values are in the range [0, 255]
input_image = np.clip(input_image, 0, 255).astype(np.uint8)

input_image = np.expand_dims(input_image,axis=0)

print(input_image.shape)

# The input data's shape should match input_details[0]['shape'], which is
# BATCH_SIZE x HEIGHT (192) x WIDTH (192) x CHANNELS (3)
interpreter.set_tensor(input_details[0]['index'], input_image)

interpreter.invoke()

output_details = interpreter.get_output_details()
# print(output_details[0])
# out_details_1 = output_details[0]
# output_tensors = [interpreter.get_tensor(out_details_1['index']) for output_detail in out_details_1]
# print(output_tensors)
# The function `get_tensor()` returns a copy of the tensor data.
# Use `tensor()` in order to get a pointer to the tensor.
output_dict = {
    'num_detections': int(interpreter.get_tensor(output_details[3]["index"])),
    'detection_classes': interpreter.get_tensor(output_details[1]["index"]).astype(np.uint8),
    'detection_boxes' : interpreter.get_tensor(output_details[0]["index"]),
    'detection_scores' : interpreter.get_tensor(output_details[2]["index"])
    }

# output_dict = {
#     'detection_classes': interpreter.get_tensor(output_details[1]["index"]).astype(np.uint8),
#     'detection_boxes' : interpreter.get_tensor(output_details[0]["index"])
#     }
print(output_dict)
# Get the output tensors
# box_predictor_output = interpreter.get_tensor(output_details[0]['index'])
# class_predictor_output = interpreter.get_tensor(output_details[1]['index'])

# # You may need to reshape the outputs to match the expected shape
# # The shape should be [batch, height, width, box_encoding * num_anchors] for box predictor
# # and [batch, height, width, num_classes * num_anchors] for class predictor
# batch_size, height, width, _ = box_predictor_output.shape  # Extract height and width
# print(batch_size,height ,width  )
# # The following code is assuming a specific arrangement of data
# # If your model's data layout is different, adjust accordingly

# # Reshape the box predictor output
# box_encoding_size = 4  # You mentioned [y_center, x_center, box_height, box_width]
# num_anchors = box_predictor_output.shape[-1] // box_encoding_size
# box_predictor_output = box_predictor_output.reshape((batch_size, height, width, box_encoding_size, num_anchors))

# # Reshape the class predictor output
# num_classes = 91  # Number of classes, including the background class
# num_anchors = class_predictor_output.shape[-1] // num_classes
# class_predictor_output = class_predictor_output.reshape((batch_size, height, width, num_classes, num_anchors))

# print(box_predictor_output)
# print(class_predictor_output)

output_image = draw_boxes_on_image(input_image[0], output_dict, labels)
# # print(labels)
# # # Display the image with bounding boxes
# # cv2.imshow("Object Detection", output_image)
# # cv2.waitKey(0)
# # # cv2.destroyAllWindows()
# # # for i in labels:
# # output_image = draw_boxes_on_image(input_image[0], output_dict, labels)

# # # Display the image with all detected bounding boxes
cv2.imshow("Object Detection", output_image)
# # Load and preprocess the input image
# # input_image = cv2.imread("your_input_image.jpg")  # Replace with the path to your input image
# # input_image = cv2.cvtColor(input_image, cv2.COLOR_BGR2RGB)

# # Loop through each anchor box and draw it on the image
# input_image = cv2.imread(image_name)  # Replace with the path to your input image
# input_image = cv2.cvtColor(input_image, cv2.COLOR_BGR2RGB)

# for i in range(num_anchors):
#     box_coordinates = box_predictor_output[0, :, :, :, i]
#     class_scores = class_predictor_output[0, :, :, :, i]

#     # Threshold the class scores to identify objects
#     class_threshold = 0.5  # Adjust this threshold as needed
#     object_indices = np.where(class_scores > class_threshold)

#     for index in range(len(object_indices[0])):
#         y, x = object_indices[0][index], object_indices[1][index]
#         box = box_coordinates[y, x]

#         ymin, xmin, ymax, xmax = box
#         height, width, _ = input_image.shape
#         # print(height)

#         # Convert normalized coordinates to pixel values
#         xmin = int(xmin * width)
#         xmax = int(xmax * width)
#         ymin = int(ymin * height)
#         ymax = int(ymax * height)

#         # Draw bounding box
#         color = (0, 255, 0)  # Green
#         thickness = 2
#         cv2.rectangle(input_image, (xmin, ymin), (xmax, ymax), color, thickness)

#         # Get the class label
#         class_id = np.argmax(class_scores[y, x])
#         class_name = labels[class_id]

#         # Label with class name and score
#         label = f'{class_name} ({class_scores[y, x, class_id]:.2f})'
#         font = cv2.FONT_HERSHEY_SIMPLEX
#         font_scale = 0.6
#         font_thickness = 1
#         text_size, _ = cv2.getTextSize(label, font, font_scale, font_thickness)
#         cv2.rectangle(input_image, (xmin, ymin), (xmin + text_size[0], ymin - text_size[1] - 2), color, thickness=cv2.FILLED)
#         cv2.putText(input_image, label, (xmin, ymin - 2), font, font_scale, (0, 0, 0), font_thickness, lineType=cv2.LINE_AA)

# # Display the image with bounding boxes and labels
cv2.imshow("Object Detection", output_image)
cv2.waitKey(0)
dev-techshlok commented 8 months ago

Hello @laxmareddyp

Any luck finding the solution?

Thank you

laxmareddyp commented 8 months ago

Hi @dev-techshlok ,

We will look into this and let you know asap.

Thanks.

dev-techshlok commented 8 months ago

Hello @laxmareddyp

Thank you, looking forward to it.

dev-techshlok commented 8 months ago

Any update?