Open gigasurgeon opened 10 months ago
And to infer from the ONNX model, I am using @HtwoOtwo's script from this comment ->https://github.com/facebookresearch/Detic/issues/107#issuecomment-1803014546
The slightly modified inference script looks like this ->
import argparse
import cv2
import numpy as np
import onnxruntime as ort
import time
class Detic():
def __init__(self, modelpath, detection_width=800, confThreshold=0.8):
providers = ['CUDAExecutionProvider']
self.session = ort.InferenceSession(modelpath, providers=providers)
model_inputs = self.session.get_inputs()
self.input_name = model_inputs[0].name
self.max_size = detection_width
self.confThreshold = confThreshold
self.class_names = ['scoop', 'teaspoon', 'spoon', 'tea_spoon', 'flatware', 'tong', 'coffee_spoon', 'soupspoon', 'soup_spoon', 'spatula',
'ladle', 'skimmer', 'bowl', 'egg_bowl', 'sugar_bowl', 'washing_bowl', 'salad_bowl', 'cereal_bowl', 'soup_bowl', 'saucepan',
'frying_pan', 'pan', 'cake_pan', 'sauce_pan', 'content_pan', 'wok', 'saucer', 'plate', 'chinaware', 'glass', 'wine_glass',
'chalice', 'dixie_cup', 'flute_glass', 'shot_glass', 'wineglass', 'milk_bottle', 'bottle', 'water_bottle', 'wine_bottle',
'beer_bottle', 'tea_pot', 'pot', 'pressure_pot', 'pasta_pot', 'plastic_pot', 'sauce_pot', 'teapot', 'crock_pot', 'crockpot',
'cup', 'measuring_cup', 'coffee_cup', 'mug', 'teacup', 'tea_cup', 'pitcher', 'coffee_jar', 'sugar_jar', 'honey_jar', 'jar',
'jug', 'coffeepot', 'kettle', 'water_jug', 'urn', 'cream_pitcher', 'coffee_pot', 'container', 'lunch_box', 'sugar_container',
'milk_container', 'rice_container', 'sauce_container', 'food_container', 'casserole', 'knife', 'steak_knife', 'knife_sharpener',
'lime_squeezer', 'peeler', 'grater', 'skimmer', 'cheese_grater', 'masher', 'squeezer', 'potato_peeler', 'lime_juicer', 'scissor',
'tray', 'baking_tray', 'pizza_tray', 'baking_pan', 'serving_board', 'eating_board', 'chopping_board', 'cut_board', 'cutting_board',
'board', 'pasta_strainer', 'strainer', 'mesh_strainer', 'can', 'beer_can', 'milk_can', 'canister', 'wine_bucket', 'bucket', 'plastic_bucket']
# self.assigned_colors = np.random.randint(0,high=256, size=(len(self.class_names), 3)).tolist()
self.assigned_colors = np.random.randint(0,high=256, size=(4, 3)).tolist()
def preprocess(self, srcimg):
im_h, im_w, _ = srcimg.shape
dstimg = cv2.cvtColor(srcimg, cv2.COLOR_BGR2RGB)
if im_h < im_w:
scale = self.max_size / im_h
oh, ow = self.max_size, scale * im_w
else:
scale = self.max_size / im_w
oh, ow = scale * im_h, self.max_size
max_hw = max(oh, ow)
if max_hw > self.max_size:
scale = self.max_size / max_hw
oh *= scale
ow *= scale
ow = int(ow + 0.5)
oh = int(oh + 0.5)
dstimg = cv2.resize(dstimg, (1067, 800))
return dstimg
def suppress_overlapping_bboxes(self, pred_boxes, scores, pred_classes, pred_masks):
pred_boxes = pred_boxes.astype(np.int64)
coord_str_dict = {}
for i in range(pred_boxes.shape[0]):
coord_str = f'{pred_boxes[i][0]}_{pred_boxes[i][1]}_{pred_boxes[i][2]}_{pred_boxes[i][3]}'
if coord_str not in coord_str_dict:
coord_str_dict[coord_str] = i
else:
if scores[i]>coord_str_dict[coord_str]:
coord_str_dict[coord_str] = i
pred_boxes = np.array([pred_boxes[coord_str_dict[coord_str]] for coord_str in coord_str_dict])
scores = np.array([scores[coord_str_dict[coord_str]] for coord_str in coord_str_dict])
pred_classes = np.array([pred_classes[coord_str_dict[coord_str]] for coord_str in coord_str_dict])
pred_masks = np.array([pred_masks[coord_str_dict[coord_str]] for coord_str in coord_str_dict])
return pred_boxes, scores, pred_classes, pred_masks
def post_processing(self, pred_boxes, scores, pred_classes, pred_masks, im_hw, pred_hw):
scale_x, scale_y = (im_hw[1] / pred_hw[1], im_hw[0] / pred_hw[0])
pred_boxes[:, 0::2] *= scale_x
pred_boxes[:, 1::2] *= scale_y
pred_boxes[:, [0, 2]] = np.clip(pred_boxes[:, [0, 2]], 0, im_hw[1])
pred_boxes[:, [1, 3]] = np.clip(pred_boxes[:, [1, 3]], 0, im_hw[0])
threshold = 0
widths = pred_boxes[:, 2] - pred_boxes[:, 0]
heights = pred_boxes[:, 3] - pred_boxes[:, 1]
keep = (widths > threshold) & (heights > threshold)
pred_boxes = pred_boxes[keep]
scores = scores[keep]
pred_classes = pred_classes[keep]
pred_masks = pred_masks[keep]
# mask_threshold = 0.5
# pred_masks = paste_masks_in_image(
# pred_masks[:, 0, :, :], pred_boxes,
# (im_hw[0], im_hw[1]), mask_threshold
# )
threshold = 0.5
idx = scores>threshold
scores = scores[idx]
pred_boxes = pred_boxes[idx]
pred_classes = pred_classes[idx]
pred_masks = pred_masks[idx]
pred_boxes, scores, pred_classes, pred_masks = self.suppress_overlapping_bboxes(pred_boxes, scores, pred_classes, pred_masks)
pred = {
'pred_boxes': pred_boxes,
'scores': scores,
'pred_classes': pred_classes,
'pred_masks': pred_masks,
}
# print(pred)
# exit()
return pred
def draw_predictions(self, img, predictions):
height, width = img.shape[:2]
default_font_size = int(max(np.sqrt(height * width) // 90, 10))
boxes = predictions["pred_boxes"].astype(np.int64)
scores = predictions["scores"]
# print(predictions["pred_classes"])
# exit()
classes_id = predictions["pred_classes"].tolist()
# masks = predictions["pred_masks"].astype(np.uint8)
num_instances = len(boxes)
print('detect', num_instances, 'instances')
for i in range(num_instances):
x0, y0, x1, y1 = boxes[i]
# color = self.assigned_colors[classes_id[i]]
color = [0,255,0]
cv2.rectangle(img, (x0, y0), (x1, y1), color=color,thickness=default_font_size // 4)
# text = "{} {:.0f}%".format(self.class_names[classes_id[i]], round(scores[i],2) * 100)
text = f"{x0}_{y0}_{x1}_{y1} {round(scores[i],2)} {self.class_names[classes_id[i]]}"
print(text)
cv2.putText(img, text, (x0, y0 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, thickness=1, lineType=cv2.LINE_AA)
return img
def detect(self, srcimg):
im_h, im_w = srcimg.shape[:2]
dstimg = self.preprocess(srcimg)
pred_hw = dstimg.shape[:2]
input_image = dstimg.transpose(2, 0, 1).astype(np.float32)
# input_image = np.expand_dims(dstimg.transpose(2, 0, 1), axis=0).astype(np.float32)
# Inference
pred_boxes, pred_classes, pred_masks, scores, _ = self.session.run(None, {self.input_name: input_image})
# print(len(scores))
# exit()
preds = self.post_processing(pred_boxes, scores, pred_classes, pred_masks, (im_h, im_w), pred_hw)
return preds
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--imgpath", default="desk.jpg", type=str, help="image path")
parser.add_argument("--confThreshold", default=0.5, type=float, help='class confidence')
parser.add_argument("--modelpath", type=str, default='onnx_models/model_custom_vocabulary.onnx', help="onnxmodel path")
args = parser.parse_args()
mynet = Detic(args.modelpath, confThreshold=args.confThreshold)
srcimg = cv2.imread(args.imgpath)
fpses = []
for i in range(1):
print(i)
t1 = time.time()
preds = mynet.detect(srcimg)
t2 = time.time()
fps = 1/(t2-t1)
fpses.append(fps)
avg_fps = sum(fpses)/len(fpses)
print(f'avg_fps: {round(avg_fps, 2)}')
result = mynet.draw_predictions(srcimg, preds)
cv2.imwrite('result_onnx.jpg', result)
Hello @gigasurgeon, thanks for the tutorial. Would you be able to upload directly the onnx file you produced with the script? Thank you so much
Hello @gigasurgeon, thanks for the tutorial. Would you be able to upload directly the onnx file you produced with the script? Thank you so much
Here's the ONNX file -> https://drive.google.com/file/d/1hYz19lZk4ugLrUGO0HIP9M2RbXs5A4O-/view?usp=sharing
I wanted to share the method to export detic model to ONNX format with custom vocabulary.
Step 1) First of all, comment out this line
box_features = _ScaleGradient.apply(box_features, 1.0 / self.num_cascade_stages)
in custom_rcnn.pyStep 2) Also, according to this comment https://github.com/facebookresearch/Detic/issues/107#issuecomment-1752039648 , you have to comment the nms_and_topk line in centernet, while exporting the model
boxlists = self.nms_and_topK(boxlists, nms=not self.not_nms)
Step 3) Now on to the main part. You need to modify this file ->
Detic/detectron2/tools/deploy/export_model.py
This is the final script I had
At line 253
custom_classes = ['scoop', ....
is where I have added my custom labels.Step 4) Now you need to execute this script with the command
python3 detectron2/tools/deploy/export_model_lvis_vocabulary.py --config-file configs/Detic_LCOCOI21k_CLIP_SwinB_896b32_4x_ft4x_max-size.yaml --sample-image desk.jpg --output ./output --export-method tracing --format onnx MODEL.WEIGHTS models/Detic_LCOCOI21k_CLIP_SwinB_896b32_4x_ft4x_max-size.pth MODEL.DEVICE cuda
from detic's root folder. This will save the ONNX model atoutput/model.onnx
.