Yolov7 on Jetson Orin - Slow performance

I'm using the following code on a Jetson Orin developer kit and with a custom dataset the detection time is approximately 1.5 sec using the GPU. The OS on Orin in Ubuntu 20.04. Could you please let me know if there is something else that could help with the detection time?
class Yolo7CameraDetectorService(CameraDetectorService):
def __init__(self):
    super().__init__()

    #Define a dictionary that
    # Key : integer
    # Value : timestamp of last updated(exit camera timestamp)
    self.__detected_items = dict()

"""
Detect items using yolo7

Store items that remains in the current frame in the detected items dictionary
Remove all the other items and write them to camera measurements buffer
"""

def detect(self, camera: CameraRunningContext):

    #Call abstract method
    super().detect(camera)

    # Counting area ROI
    yellow_poly_pts = np.array([[0, 150], [1440, 150], [1440, 850], [0, 850]])

    #Define codec for video
    fourcc = cv2.VideoWriter_fourcc(*'XVID')

    # Load Object Detection

    # ATTENTION
    # Object detection applies toLower() on path
    path_waights = Path(camera.camera.detection_weights_path)

    od = ObjectDetection(path_waights)
    od.load_detection_model(image_size=416, confThreshold=0.5, nmsThreshold=0.4)
    od.load_class_names(camera.camera.detection_classes_names)

    # Load Object Tracking Deep Sort
    deep = Deep(max_distance=0.8,
                nms_max_overlap=0.3,
                n_init=3,
                max_age=5,
                max_iou_distance=0.7)

    tracker = deep.sort_tracker()

    # While running camera status is enabled and streaming and detecting
    while camera.status is RunningCameraStatus.ENABLED_AND_STREAMING_AND_DETECTING :

        now = datetime.utcnow()

        # Get current raw image frame
        image = camera.get_cur_raw_image()

        # Convert to RGB image
        rgb = image.convert("RGB")

        # Convert np array
        array = rgb.get_numpy_array()
        #out = cv2.VideoWriter('/home/christina/Desktop/yv7run/output.avi', fourcc, 20.0,(1080, 1080))

        # opencv loads the format bgr so the frame needs to be converted to rgb to process
        array = cv2.cvtColor(array, cv2.COLOR_BGR2RGB)

        np.save("/home/christina/Desktop/test.np",array)

        logging.info("Get frame and convert to cvtcolor in mseconds=" + str( (datetime.utcnow() - now).total_seconds() * 1000 ))

        """ 1. Object Detection """
        (class_ids, scores, boxes) = od.detect(array)
        logging.info(
            "od detect only in mseconds=" + str((datetime.utcnow() - now).total_seconds() * 1000))

        """ 2. Object Tracking """
        features = deep.encoder(array, boxes)
        detections = deep.Detection(boxes, scores, class_ids, features)

        logging.info(
            "tracking only in mseconds=" + str((datetime.utcnow() - now).total_seconds() * 1000))

        # Draw yellow polygon for the ROI
        cv2.polylines(array, [yellow_poly_pts], True, (0, 255, 255), 2)

        tracker.predict()
        (class_ids, object_ids, boxes) = tracker.update(detections)

        logging.info("Object dtection and tracker in mseconds=" + str((datetime.utcnow() - now).total_seconds() * 1000))

        # Iterate tracker results
        for class_id, object_id, box in zip(class_ids, object_ids, boxes):

            (x, y, x2, y2) = box
            class_name = od.names[class_id]
            color = od.colors[class_id]

            # Center doubleroll detected
            cx = int((x + x2) / 2)
            cy = int((y + y2) / 2)

            cv2.rectangle(array, (x, y), (x2, y2), (0, 0, 255), 2)
            cv2.rectangle(array, (x, y), (x + len(class_name) * 20, y - 30), (0, 0, 255), -1)
            cv2.putText(array, class_name + " " + str(object_id), (x, y - 10), 0, 0.75, (255, 255, 255), 2)
            cv2.circle(array, (cx, cy), 4, (0, 0, 255), -1)

            """3. Object Counting"""

            # Check if object is in the ROI
            result = cv2.pointPolygonTest(yellow_poly_pts, (int(cx), int(cy)), False)
            #print(result, object_id, datetime.utcnow())

            if result > 0:
                # Store item to dictionary as camera single measurement value
                self.__detected_items[object_id] = CameraSingleMeasurementValue(value=1, timestamp= datetime.now())
                print("FOUND ONE!!!", result)
                #cv2.imwrite("/home/christina/Desktop/yv7run/Img005.png", array)
                print("Done with image")

            #out.write(array)

            # Show text with counter
        #od_count = len(obs_counted)
        #cv2.putText(array, "Number of Objects {}".format(od_count), (20, 20), 0, 1, (0, 255, 0), 2)

        #imS = cv2.resize(array, (960, 540))
        #cv2.imshow("Frame", imS)
        #cv2.waitKey(0)
            #out.release()

        #Outside iteration
        #Remove all items that does not exist in the current object ids list
        # The current frame detected items must be added before to the dictionary
        camera_measurement_values = self.__remove_and_get_all_except(object_ids)

        #Add values to buffers
        self.camera_buffered_measurements_service.add_multi(camera.camera.crguid, camera_measurement_values)

        logging.info(
            "Total mseconds=" + str((datetime.utcnow() - now).total_seconds() * 1000))

"""
Remove and retutn all items ids from 
"""
def __remove_and_get_all_except(self, object_ids) -> List[CameraSingleMeasurementValue]:

    # Get all dictionary keys that does not exist in the current frame detected objects
    keys = [key for key in self.__detected_items.keys() if key not in object_ids]

    # Get values for the above keys
    values = [self.__detected_items.get(key) for key in keys]

    # Remove above keys form dictionary
    for key in keys: del self.__detected_items[key]

    # Dictionary contains only detected items that are not exited camera and are still trackable
    # Return items that exits camera context
    return values
WongKinYiu / yolov7

Yolov7 on Jetson Orin - Slow performance #1610