fabio-sim / Depth-Anything-ONNX

ONNX-compatible Depth Anything: Unleashing the Power of Large-Scale Unlabeled Data
Apache License 2.0
239 stars 23 forks source link

Do you have example depth_to_pointcloud.py with ONNX #19

Closed Siwakonrome closed 1 week ago

Siwakonrome commented 1 month ago

Do you have example depth_to_pointcloud.py(depth-anything v2) with ONNX models.

Thank you.

fabio-sim commented 1 month ago

Hi @Siwakonrome, thank you for your interest in Depth-Anything-ONNX.

If I understand the script correctly, inference is the same up to this line:

https://github.com/DepthAnything/Depth-Anything-V2/blob/31dc97708961675ce6b3a8d8ffa729170a4aa273/metric_depth/depth_to_pointcloud.py#L93

        pred = depth_anything.infer_image(image, height)

In the ONNX inference, the above line corresponds to:

https://github.com/fabio-sim/Depth-Anything-ONNX/blob/3128cb99056785cc843ad6deda2f53f2e2ff4272/dynamo.py#L238

This means that you only need to adapt the following lines:

https://github.com/DepthAnything/Depth-Anything-V2/blob/31dc97708961675ce6b3a8d8ffa729170a4aa273/metric_depth/depth_to_pointcloud.py#L96-L110

        # Resize depth prediction to match the original image size
        resized_pred = Image.fromarray(pred).resize((width, height), Image.NEAREST)

        # Generate mesh grid and calculate point cloud coordinates
        x, y = np.meshgrid(np.arange(width), np.arange(height))
        x = (x - width / 2) / args.focal_length_x
        y = (y - height / 2) / args.focal_length_y
        z = np.array(resized_pred)
        points = np.stack((np.multiply(x, z), np.multiply(y, z), z), axis=-1).reshape(-1, 3)
        colors = np.array(color_image).reshape(-1, 3) / 255.0

        # Create the point cloud and save it to the output directory
        pcd = o3d.geometry.PointCloud()
        pcd.points = o3d.utility.Vector3dVector(points)
        pcd.colors = o3d.utility.Vector3dVector(colors)
        o3d.io.write_point_cloud(os.path.join(args.outdir, os.path.splitext(os.path.basename(filename))[0] + ".ply"), pcd)
Siwakonrome commented 1 month ago

@fabio-sim Thank You for the Information.

I try to export onnx from metric_depth like depth_anything_v2_vits_dynamic.onnx

depth_anything_v2_vits.pth. Metric.

Export code:

import torch
from metric_depth.depth_anything_v2.dpt import DepthAnythingV2

model_configs = {
        'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]},
        'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]},
        'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]},
        'vitg': {'encoder': 'vitg', 'features': 384, 'out_channels': [1536, 1536, 1536, 1536]}
    }

opset = 18
batch_size = 1
encoder_name = 'vits'
output_onnx = '/content/test0.onnx'
model_path = '/content/depth_anything_v2_vits.pth'

def export():
    """Export Depth-Anything V2 using TorchDynamo."""
    model = DepthAnythingV2(**{**model_configs[encoder_name], 'max_depth': 20})
    model.load_state_dict(torch.load(model_path, map_location='cpu'))

    if True:
        if opset == 18:
            onnx_program = torch.onnx.dynamo_export(
                model, torch.randn(batch_size, 3, 518, 518)
            )
            onnx_program.save(str(output_onnx))

export()

It work when i resized image input ( image = cv2.resize(image, (width, height), interpolation=cv2.INTER_CUBIC) ). But when i remove this line ( image = cv2.resize(image, (width, height), interpolation=cv2.INTER_CUBIC) ) it return:

RuntimeError: Error in execution: Got invalid dimensions for input: l_x_ for the following indices
 index: 2 Got: 1544 Expected: 518
 index: 3 Got: 2048 Expected: 518
 Please fix either the inputs/outputs or the model.

Infer code:

import cv2
from PIL import Image
import onnxruntime as ort
import numpy as np
import open3d as o3d
import time

'''
Basler 2K
'''
ppx = 8.709002501455901 * 10**2 # Principle point x in pixel.
ppy = 5.878986862144450 * 10**2 # Principle point y in pixel.
focal_length_x_pixel = 1.017288703871180 * 10**4 # Focal length in pixel.
focal_length_y_pixel = 1.018932963024356 * 10**4 # Focal length in pixel.

def infer():
    """Depth-Anything V2 inference using ONNXRuntime. No dependency on PyTorch."""

    image_path = r'C:\FiboWork\bgc_glass_gob_inspection\depth_anything_v2\images\29.png'
    model_path = r'C:\FiboWork\bgc_glass_gob_inspection\depth_anything_v2\models\test.onnx'
    width, height = 518, 518

    image = cv2.imread(str(image_path))
    color_image = Image.open(image_path).convert('RGB')
    h, w = image.shape[:2]
    print(h, w)

    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) / 255.0
    image = cv2.resize(image, (width, height), interpolation=cv2.INTER_CUBIC) # Need to using dynamic image shape
    image = (image - [0.485, 0.456, 0.406]) / [0.229, 0.224, 0.225]
    image = image.transpose(2, 0, 1)[None].astype("float32")

    # Inference
    sess_options = ort.SessionOptions()
    sess_options.enable_profiling = False

    providers = ["CUDAExecutionProvider", "CPUExecutionProvider"]

    session = ort.InferenceSession(
        model_path, sess_options=sess_options, providers=providers
    )
    binding = session.io_binding()
    ort_input = session.get_inputs()[0].name
    binding.bind_cpu_input(ort_input, image)
    ort_output = session.get_outputs()[0].name
    binding.bind_output(ort_output, "cuda")
    # Actual inference happens here.
    session.run_with_iobinding(binding)  

    t0 = time.time()
    pred = binding.get_outputs()[0].numpy()[0]
    resized_pred = Image.fromarray(pred).resize((w, h), Image.NEAREST)
    print(f'Pre Processing: {time.time() - t0}')

    '''
    Post Processing
    Generate mesh grid and calculate point cloud coordinates
    '''
    t0 = time.time()
    x, y = np.meshgrid(np.arange(w), np.arange(h))
    x = (x - width / 2) / focal_length_x_pixel
    y = (y - height / 2) / focal_length_y_pixel
    z = np.array(resized_pred) # * 59.717
    points = np.stack((np.multiply(x, z), np.multiply(y, z), z), axis=-1).reshape(-1, 3)
    print(f'Post Processing: {time.time() - t0}')
    '''
    Create the point cloud and save it to the output directory
    '''
    colors = np.array(color_image).reshape(-1, 3) / 255.0
    pcd = o3d.geometry.PointCloud()
    pcd.points = o3d.utility.Vector3dVector(points)
    pcd.colors = o3d.utility.Vector3dVector(colors)
    bounding_box = o3d.geometry.AxisAlignedBoundingBox(min_bound=np.array([-200.0, -200.0, 1160.0]), 
                                                           max_bound=np.array([200.0, 200.0, 1500.0])
                                                           )
    cropped_pcd = pcd.crop(bounding_box)
    obb = cropped_pcd.get_oriented_bounding_box() 
    obb.color = (1, 0, 0)
    dimension = obb.extent
    dimension_text = f"Dimensions (L x R) in mm: {dimension[0]:.2f} x {dimension[1]:.2f}"
    print(dimension_text)
    o3d.visualization.draw_geometries([cropped_pcd, obb])

infer()

How to export onnx that i can using dynamic shape of input image. What did i do wrong? Thank you.

fabio-sim commented 1 month ago

For dynamic shapes, you can try this command:

python dynamo.py export -b 0 -h 0 -w 0 --opset 17