About CRestereo model to tensorrt！

sunmooncode commented 2 years ago

Issue Type

Others

OS

Ubuntu

OS architecture

x86_64

Programming Language

C++

Framework

TensorRT

Model name and Weights/Checkpoints URL

CREstereo onnx combine model

Description

When I use onnx-tensorrt to convert the combined model, I get the following error:

[2022-04-14 03:45:33   ERROR] [layers.cpp::EinsumLayer::5525] Error Code 3: API Usage Error (Parameter check failed at: optimizer/api/layers.cpp::EinsumLayer::5525, condition: nbInputs > 0 && nbInputs <= MAX_EINSUM_NB_INPUTS
)
While parsing node number 113 [Einsum -> "onnx::Mul_589"]:
ERROR: builtin_op_importers.cpp:1336 In function importEinsum:
[8] Assertion failed: layer_ptr && "Input layer is null."

I see here that you successfully converted the model, could you help me a little!

Relevant Log Output

No response

URL or source code for simple inference testing code

No response

PINTO0309 commented 2 years ago

ibaiGorordo/ONNX-CREStereo-Depth-Estimation

video_depth_estimation.py

import cv2
import pafy
import numpy as np
import glob
from crestereo import CREStereo, CameraConfig

# Initialize video
# cap = cv2.VideoCapture("video.mp4")
videoUrl = 'https://youtu.be/Yui48w71SG0'
start_time = 0 # skip first {start_time} seconds
videoPafy = pafy.new(videoUrl)
print(videoPafy.streams)
cap = cv2.VideoCapture(videoPafy.streams[-1].url)
# cap.set(cv2.CAP_PROP_POS_FRAMES, start_time*30)

# Model options (not all options supported together)
iters = 5            # Lower iterations are faster, but will lower detail.
                   # Options: 2, 5, 10, 20

input_shape = (320, 480)   # Input resolution.
                   # Options: (240,320), (320,480), (380, 480), (360, 640), (480,640), (720, 1280)

version = "combined" # The combined version does 2 passes, one to get an initial estimation and a second one to refine it.
                   # Options: "init", "combined"

# Camera options: baseline (m), focal length (pixel) and max distance
# TODO: Fix with the values witht the correct configuration for YOUR CAMERA
camera_config = CameraConfig(0.12, 0.5*input_shape[1]/0.72)
max_distance = 10

# Initialize model
model_path = f'models/crestereo_{version}_iter{iters}_{input_shape[0]}x{input_shape[1]}.onnx'
depth_estimator = CREStereo(model_path, camera_config=camera_config, max_dist=max_distance)

cv2.namedWindow("Estimated depth", cv2.WINDOW_AUTOSIZE)
while cap.isOpened():

  try:
      # Read frame from the video
      ret, frame = cap.read()
      if not ret:
          break
  except:
      continue

  # Extract the left and right images
  left_img  = frame[:,:frame.shape[1]//3]
  right_img = frame[:,frame.shape[1]//3:frame.shape[1]*2//3]
  color_real_depth = frame[:,frame.shape[1]*2//3:]

  # Estimate the depth
  disparity_map = depth_estimator(left_img, right_img)
  color_depth = depth_estimator.draw_depth()
  combined_image = np.hstack((left_img, color_real_depth, color_depth))

  cv2.imshow("Estimated depth", combined_image)

  # Press key q to stop
  if cv2.waitKey(1) == ord('q'):
      break

cap.release()
cv2.destroyAllWindows()

crestereo.py

from dataclasses import dataclass

import cv2
import numpy as np
import onnxruntime
import os

from performance_monitor import *

@dataclass
class CameraConfig:
  baseline: float
  f: float

DEFAULT_CONFIG = CameraConfig(0.546, 120) # rough estimate from the original calibration

class CREStereo():

  def __init__(self, model_path, camera_config=DEFAULT_CONFIG, max_dist=10):

      self.initialize_model(model_path, camera_config, max_dist)

  def __call__(self, left_img, right_img):

      return self.update(left_img, right_img)

  def initialize_model(self, model_path, camera_config=DEFAULT_CONFIG, max_dist=10):

      self.camera_config = camera_config
      self.max_dist = max_dist

      # Initialize model session
      self.session = onnxruntime.InferenceSession(
          model_path, providers=[
              (
                  'TensorrtExecutionProvider', {
                      'trt_engine_cache_enable': True,
                      'trt_engine_cache_path': os.path.dirname(model_path),
                      'trt_fp16_enable': True,
                  }
              ),
              'CUDAExecutionProvider',
              'CPUExecutionProvider'
          ]
      )
      # Get model info
      self.get_input_details()
      self.get_output_details()

      # Check if the model has init flow
      self.has_flow = len(self.input_names) > 2

  def update(self, left_img, right_img):

      self.img_height, self.img_width = left_img.shape[:2]

      left_tensor = self.prepare_input(left_img)
      right_tensor = self.prepare_input(right_img)

      # Get the half resolution to calculate flow_init
      if self.has_flow:

          left_tensor_half = self.prepare_input(left_img, half=True)
          right_tensor_half = self.prepare_input(right_img, half=True)

          # Estimate the disparity map
          outputs = self.inference_with_flow(left_tensor_half, right_tensor_half,
                                              left_tensor, right_tensor)
      else:
          # Estimate the disparity map
          outputs = self.inference_without_flow(left_tensor, right_tensor)

      self.disparity_map = self.process_output(outputs)

      # Estimate depth map from the disparity
      self.depth_map = self.get_depth_from_disparity(self.disparity_map, self.camera_config)

      return self.disparity_map

  def prepare_input(self, img, half=False):

      img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

      if half:
          img_input = cv2.resize(img, (self.input_width//2,self.input_height//2))
      else:
          img_input = cv2.resize(img, (self.input_width, self.input_height))

      img_input = img_input.transpose(2, 0, 1)
      img_input = img_input[np.newaxis,:,:,:]

      return img_input.astype(np.float32)

  @performance
  def inference_without_flow(self, left_tensor, right_tensor):

      return self.session.run(self.output_names, {self.input_names[0]: left_tensor,
                                                  self.input_names[1]: right_tensor})[0]

  @performance
  def inference_with_flow(self, left_tensor_half, right_tensor_half, left_tensor, right_tensor):

      return self.session.run(self.output_names, {self.input_names[0]: left_tensor_half,
                                                  self.input_names[1]: right_tensor_half,
                                                  self.input_names[2]: left_tensor,
                                                  self.input_names[3]: right_tensor})[0]

  def process_output(self, output):

      return np.squeeze(output[:,0,:,:])

  @staticmethod
  def get_depth_from_disparity(disparity_map, camera_config):

      return camera_config.f*camera_config.baseline/disparity_map

  def draw_disparity(self):

      disparity_map =  cv2.resize(self.disparity_map,  (self.img_width, self.img_height))
      norm_disparity_map = 255*((disparity_map-np.min(disparity_map))/
                                (np.max(disparity_map)-np.min(disparity_map)))

      return cv2.applyColorMap(cv2.convertScaleAbs(norm_disparity_map,1), cv2.COLORMAP_MAGMA)

  def draw_depth(self):

      return self.util_draw_depth(self.depth_map, (self.img_width, self.img_height), self.max_dist)

  @staticmethod
  def util_draw_depth(depth_map, img_shape, max_dist):

      norm_depth_map = 255*(1-depth_map/max_dist)
      norm_depth_map[norm_depth_map < 0] = 0
      norm_depth_map[norm_depth_map >= 255] = 0

      norm_depth_map =  cv2.resize(norm_depth_map, img_shape)

      return cv2.applyColorMap(cv2.convertScaleAbs(norm_depth_map,1), cv2.COLORMAP_MAGMA)

  def get_input_details(self):

      model_inputs = self.session.get_inputs()
      self.input_names = [model_inputs[i].name for i in range(len(model_inputs))]

      self.input_shape = model_inputs[-1].shape
      self.input_height = self.input_shape[2]
      self.input_width = self.input_shape[3]

  def get_output_details(self):

      model_outputs = self.session.get_outputs()
      self.output_names = [model_outputs[i].name for i in range(len(model_outputs))]

      self.output_shape = model_outputs[0].shape

if __name__ == '__main__':

  from imread_from_url import imread_from_url

  # Initialize model
  model_path = '../models/crestereo_combined_iter10_360x640.onnx'
  depth_estimator = CREStereo(model_path)

  # Load images
  left_img = imread_from_url("https://vision.middlebury.edu/stereo/data/scenes2003/newdata/cones/im2.png")
  right_img = imread_from_url("https://vision.middlebury.edu/stereo/data/scenes2003/newdata/cones/im6.png")

  # Estimate depth and colorize it
  for i in range(10):
      disparity_map = depth_estimator(left_img, right_img)
  color_disparity = depth_estimator.draw_disparity()

  combined_img = np.hstack((left_img, color_disparity))

  cv2.namedWindow("Estimated disparity", cv2.WINDOW_NORMAL)
  cv2.imshow("Estimated disparity", combined_img)
  cv2.waitKey(0)