Closed sunmooncode closed 2 years ago
video_depth_estimation.py
import cv2
import pafy
import numpy as np
import glob
from crestereo import CREStereo, CameraConfig
# Initialize video
# cap = cv2.VideoCapture("video.mp4")
videoUrl = 'https://youtu.be/Yui48w71SG0'
start_time = 0 # skip first {start_time} seconds
videoPafy = pafy.new(videoUrl)
print(videoPafy.streams)
cap = cv2.VideoCapture(videoPafy.streams[-1].url)
# cap.set(cv2.CAP_PROP_POS_FRAMES, start_time*30)
# Model options (not all options supported together)
iters = 5 # Lower iterations are faster, but will lower detail.
# Options: 2, 5, 10, 20
input_shape = (320, 480) # Input resolution.
# Options: (240,320), (320,480), (380, 480), (360, 640), (480,640), (720, 1280)
version = "combined" # The combined version does 2 passes, one to get an initial estimation and a second one to refine it.
# Options: "init", "combined"
# Camera options: baseline (m), focal length (pixel) and max distance
# TODO: Fix with the values witht the correct configuration for YOUR CAMERA
camera_config = CameraConfig(0.12, 0.5*input_shape[1]/0.72)
max_distance = 10
# Initialize model
model_path = f'models/crestereo_{version}_iter{iters}_{input_shape[0]}x{input_shape[1]}.onnx'
depth_estimator = CREStereo(model_path, camera_config=camera_config, max_dist=max_distance)
cv2.namedWindow("Estimated depth", cv2.WINDOW_AUTOSIZE)
while cap.isOpened():
try:
# Read frame from the video
ret, frame = cap.read()
if not ret:
break
except:
continue
# Extract the left and right images
left_img = frame[:,:frame.shape[1]//3]
right_img = frame[:,frame.shape[1]//3:frame.shape[1]*2//3]
color_real_depth = frame[:,frame.shape[1]*2//3:]
# Estimate the depth
disparity_map = depth_estimator(left_img, right_img)
color_depth = depth_estimator.draw_depth()
combined_image = np.hstack((left_img, color_real_depth, color_depth))
cv2.imshow("Estimated depth", combined_image)
# Press key q to stop
if cv2.waitKey(1) == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
crestereo.py
from dataclasses import dataclass
import cv2
import numpy as np
import onnxruntime
import os
from performance_monitor import *
@dataclass
class CameraConfig:
baseline: float
f: float
DEFAULT_CONFIG = CameraConfig(0.546, 120) # rough estimate from the original calibration
class CREStereo():
def __init__(self, model_path, camera_config=DEFAULT_CONFIG, max_dist=10):
self.initialize_model(model_path, camera_config, max_dist)
def __call__(self, left_img, right_img):
return self.update(left_img, right_img)
def initialize_model(self, model_path, camera_config=DEFAULT_CONFIG, max_dist=10):
self.camera_config = camera_config
self.max_dist = max_dist
# Initialize model session
self.session = onnxruntime.InferenceSession(
model_path, providers=[
(
'TensorrtExecutionProvider', {
'trt_engine_cache_enable': True,
'trt_engine_cache_path': os.path.dirname(model_path),
'trt_fp16_enable': True,
}
),
'CUDAExecutionProvider',
'CPUExecutionProvider'
]
)
# Get model info
self.get_input_details()
self.get_output_details()
# Check if the model has init flow
self.has_flow = len(self.input_names) > 2
def update(self, left_img, right_img):
self.img_height, self.img_width = left_img.shape[:2]
left_tensor = self.prepare_input(left_img)
right_tensor = self.prepare_input(right_img)
# Get the half resolution to calculate flow_init
if self.has_flow:
left_tensor_half = self.prepare_input(left_img, half=True)
right_tensor_half = self.prepare_input(right_img, half=True)
# Estimate the disparity map
outputs = self.inference_with_flow(left_tensor_half, right_tensor_half,
left_tensor, right_tensor)
else:
# Estimate the disparity map
outputs = self.inference_without_flow(left_tensor, right_tensor)
self.disparity_map = self.process_output(outputs)
# Estimate depth map from the disparity
self.depth_map = self.get_depth_from_disparity(self.disparity_map, self.camera_config)
return self.disparity_map
def prepare_input(self, img, half=False):
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
if half:
img_input = cv2.resize(img, (self.input_width//2,self.input_height//2))
else:
img_input = cv2.resize(img, (self.input_width, self.input_height))
img_input = img_input.transpose(2, 0, 1)
img_input = img_input[np.newaxis,:,:,:]
return img_input.astype(np.float32)
@performance
def inference_without_flow(self, left_tensor, right_tensor):
return self.session.run(self.output_names, {self.input_names[0]: left_tensor,
self.input_names[1]: right_tensor})[0]
@performance
def inference_with_flow(self, left_tensor_half, right_tensor_half, left_tensor, right_tensor):
return self.session.run(self.output_names, {self.input_names[0]: left_tensor_half,
self.input_names[1]: right_tensor_half,
self.input_names[2]: left_tensor,
self.input_names[3]: right_tensor})[0]
def process_output(self, output):
return np.squeeze(output[:,0,:,:])
@staticmethod
def get_depth_from_disparity(disparity_map, camera_config):
return camera_config.f*camera_config.baseline/disparity_map
def draw_disparity(self):
disparity_map = cv2.resize(self.disparity_map, (self.img_width, self.img_height))
norm_disparity_map = 255*((disparity_map-np.min(disparity_map))/
(np.max(disparity_map)-np.min(disparity_map)))
return cv2.applyColorMap(cv2.convertScaleAbs(norm_disparity_map,1), cv2.COLORMAP_MAGMA)
def draw_depth(self):
return self.util_draw_depth(self.depth_map, (self.img_width, self.img_height), self.max_dist)
@staticmethod
def util_draw_depth(depth_map, img_shape, max_dist):
norm_depth_map = 255*(1-depth_map/max_dist)
norm_depth_map[norm_depth_map < 0] = 0
norm_depth_map[norm_depth_map >= 255] = 0
norm_depth_map = cv2.resize(norm_depth_map, img_shape)
return cv2.applyColorMap(cv2.convertScaleAbs(norm_depth_map,1), cv2.COLORMAP_MAGMA)
def get_input_details(self):
model_inputs = self.session.get_inputs()
self.input_names = [model_inputs[i].name for i in range(len(model_inputs))]
self.input_shape = model_inputs[-1].shape
self.input_height = self.input_shape[2]
self.input_width = self.input_shape[3]
def get_output_details(self):
model_outputs = self.session.get_outputs()
self.output_names = [model_outputs[i].name for i in range(len(model_outputs))]
self.output_shape = model_outputs[0].shape
if __name__ == '__main__':
from imread_from_url import imread_from_url
# Initialize model
model_path = '../models/crestereo_combined_iter10_360x640.onnx'
depth_estimator = CREStereo(model_path)
# Load images
left_img = imread_from_url("https://vision.middlebury.edu/stereo/data/scenes2003/newdata/cones/im2.png")
right_img = imread_from_url("https://vision.middlebury.edu/stereo/data/scenes2003/newdata/cones/im6.png")
# Estimate depth and colorize it
for i in range(10):
disparity_map = depth_estimator(left_img, right_img)
color_disparity = depth_estimator.draw_disparity()
combined_img = np.hstack((left_img, color_disparity))
cv2.namedWindow("Estimated disparity", cv2.WINDOW_NORMAL)
cv2.imshow("Estimated disparity", combined_img)
cv2.waitKey(0)
I don't know much about tensorrt. This is the direct use of the onnx model for inference?
@PINTO0309 Thanks bro! i solved it
@PINTO0309 Thanks bro! i solved it I have the same problem, how did you solve it?
Issue Type
Others
OS
Ubuntu
OS architecture
x86_64
Programming Language
C++
Framework
TensorRT
Model name and Weights/Checkpoints URL
CREstereo onnx combine model
Description
When I use onnx-tensorrt to convert the combined model, I get the following error:
I see here that you successfully converted the model, could you help me a little!
Relevant Log Output
No response
URL or source code for simple inference testing code
No response