Closed shivprasad94 closed 3 years ago
The following should work:
def load_custom_model(model_name):
model_file = model_name
model_dir = pathlib.Path(model_file)/"saved_model"
model = tf.saved_model.load(str(model_dir))
return model
model_name = 'exported-models/my_model'
detection_model = load_custom_model(model_name)
PATH_TO_LABELS` = 'training/label_map.pbtxt'
category_index = label_map_util.create_category_index_from_labelmap(
PATH_TO_LABELS, use_display_name=True)
PATH_TO_TEST_IMAGES_DIR = pathlib.Path('test_images')
TEST_IMAGE_PATHS = sorted(list(PATH_TO_TEST_IMAGES_DIR.glob("*.jpg")))
def run_inference_for_single_image(model, image):
if os.path.exists('results.csv'):
df = pd.read_csv('results.csv')
else:
df = pd.DataFrame(columns=['timestamp', 'img_path'])
image = np.asarray(image)
# The input needs to be a tensor, convert it using `tf.convert_to_tensor`.
input_tensor = tf.convert_to_tensor(image)
# The model expects a batch of images, so add an axis with `tf.newaxis`.
input_tensor = input_tensor[tf.newaxis, ...]
# Run inference
model_fn = model.signatures['serving_default']
output_dict = model_fn(input_tensor)
# All outputs are batches tensors.
# Convert to numpy arrays, and take index [0] to remove the batch dimension.
# We're only interested in the first num_detections.
num_detections = int(output_dict.pop('num_detections'))
output_dict = {key: value[0, :num_detections].numpy()
for key, value in output_dict.items()}
output_dict['num_detections'] = num_detections
# detection_classes should be ints.
output_dict['detection_classes'] = output_dict['detection_classes'].astype(
np.int64)
# Handle models with masks:
if 'detection_masks' in output_dict:
# Reframe the the bbox mask to the image size.
detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
output_dict['detection_masks'], output_dict['detection_boxes'],
image.shape[0], image.shape[1])
detection_masks_reframed = tf.cast(detection_masks_reframed > 0.5,
tf.uint8)
output_dict['detection_masks_reframed'] = detection_masks_reframed.numpy()
# Get data(label, xmin, ymin, xmax, ymax)
output = []
for index, score in enumerate(output_dict['detection_scores']):
if score < threshold:
continue
label = category_index[output_dict['detection_classes'][index]]['name']
ymin, xmin, ymax, xmax = output_dict['detection_boxes'][index]
output.append((label, int(xmin * image_width), int(ymin * image_height), int(xmax * image_width), int(ymax * image_height)))
# Save incident (could be extended to send a email or something)
for l, x_min, y_min, x_max, y_max in output:
if l == label_to_look_for:
array = cv2.cvtColor(np.array(image_show), cv2.COLOR_RGB2BGR)
image = Image.fromarray(array)
cropped_img = image.crop((x_min, y_min, x_max, y_max))
file_path = output_directory+'/images/'+str(len(df))+'.jpg'
cropped_img.save(file_path, "JPEG", icc_profile=cropped_img.info.get('icc_profile'))
df.loc[len(df)] = [datetime.datetime.now(), file_path]
df.to_csv(output_directory+'/results.csv', index=None)
return output_dict
def show_inference(model, image_path):
# the array based representation of the image will be used later in order to prepare the
# result image with boxes and labels on it.
image_np = np.array(Image.open(image_path))
# Actual detection.
output_dict = run_inference_for_single_image(model, image_np)
# Visualization of the results of a detection.
vis_util.visualize_boxes_and_labels_on_image_array(
image_np,
output_dict['detection_boxes'],
output_dict['detection_classes'],
output_dict['detection_scores'],
category_index,
instance_masks=output_dict.get('detection_masks_reframed', None),
use_normalized_coordinates=True,
line_thickness=8)
display(Image.fromarray(image_np))
for image_path in TEST_IMAGE_PATHS:
show_inference(detection_model, image_path)
Don't forget to import the needed libraries. If it doesn't work let me know.
hey, @TannerGilbert thanks. I tried the above code snippet, but it's not working as expected. I even tried printing the output list with labels and coordinates but looks like it's an empty list because the code doesn't go on to excute if score < threshold:
But most importantly, I am able to see bounding boxes on my test images from your code snippet. looks like the index values - ymin, xmin, ymax, xmax are not get extracted and stored in output[]
# Get data(label, xmin, ymin, xmax, ymax)
output = []
threshold=20 # I have set my threshold as 20
output_directory = 'cropped' #a folder directory to save result
label_to_look_for = 'num' #label name of the box
for index, score in enumerate(output_dict['detection_scores']):
if score < threshold:
continue
print('extracting index and label') # this part itself is not getting printed*
label = category_index[output_dict['detection_classes'][index]]['name']
ymin, xmin, ymax, xmax = output_dict['detection_boxes'][index]
output.append((label, int(xmin * image_width), int(ymin * image_height), int(xmax * image_width), int(ymax * image_height)))
print(output)
so because of this, i tried removing the if condition of checking score< threshold and replaced with below snippet
output = []
for index, score in enumerate(output_dict['detection_scores']):
label = category_index[output_dict['detection_classes'][index]]['name']
ymin, xmin, ymax, xmax = output_dict['detection_boxes'][index]
output.append((label, int(xmin * image_width), int(ymin * image_height), int(xmax * image_width), int(ymax * image_height)))
and ended up with the below error
NameError: name 'image_width' is not defined
so I defined values for both variable based on my input test image size 1920x1080
image_width = 1920
image_height = 1080
and then ended up with the below error
51 # Save incident (could be extended to send a email or something)
52 for l, x_min, y_min, x_max, y_max in output:
53 ---> array = cv2.cvtColor(np.array(image_show), cv2.COLOR_RGB2BGR)
54 image = Image.fromarray(array)
55 cropped_img = image.crop((x_min, y_min, x_max, y_max))
NameError: name 'image_show' is not defined
So had to replace it with 'image' and fixed the above errors.
by doing all this change it ran successfully but I ended up with almost 99 cropped images in my file directory.
as I understand the output.append
is appended with 0 to 99 coordinates because of which there are 0-99 different cropped images are stored in the directory how I can get only the best one with the highest confidence score?
I am not so sure, but is output[0] is best coordinate? instead of looping the overall list, its good to pass on output[0]?
I just want 1 best image shown in the bounding box or maybe an image stored in some variable so that I can pass it to opensource OCR tools like PyTesseract and extract text data out of it
So finally updated my code to below , by extracting coordinates of output[0]
output = []
image_width = 1920
image_height = 1080
label_to_look_for ='num'
output_directory = 'cropped'
for index, score in enumerate(output_dict['detection_scores']):
label = category_index[output_dict['detection_classes'][index]]['name']
ymin, xmin, ymax, xmax = output_dict['detection_boxes'][index]
output.append((label, int(xmin * image_width), int(ymin * image_height), int(xmax * image_width), int(ymax * image_height)))
# Save incident (could be extended to send a email or something)
#for l, x_min, y_min, x_max, y_max in output:
if label == label_to_look_for:
print(output[0])
x_min=output[0][1]
y_min=output[0][2]
x_max=output[0][3]
y_max=output[0][4]
array = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
image = Image.fromarray(array)
cropped_img = image.crop((x_min, y_min, x_max, y_max))
file_path = output_directory+'/images/'+str(len(df))+'.jpg'
cropped_img.save(file_path, "JPEG", icc_profile=cropped_img.info.get('icc_profile'))
df.loc[len(df)] = [datetime.datetime.now(), file_path]
df.to_csv(output_directory+'/results.csv', index=None)
print(output[0])
return output_dict
is output[0] is best coordinate?
Sorry for not replying for such a long time. I finally had the time to write a working example. The example works with a webcam, but it should be simple to rewrite it to work with images instead if you want to.
I'll close this issue for now. If you have any further questions feel free to ask.
I am still confused about extracting Bounding boxes as a separate image, where do I exactly need to use your code from commonly asked questions can you please elaborate?
Below are the function I am using and testing it on test images folder.