dige-mothership commented 1 year ago

Hi @dusty-nv, I was wondering if you have a function to derive the x,y,z 3D real word coordinates from the depth map and image coordinates in python? Or an explanation on how to do it?

dige-mothership commented 1 year ago

I think what I was looking for is how to convert depth maps to point clouds, I'll do some googling but any insight you might offer will be great

dusty-nv commented 1 year ago

@dige-mothership I don't have it in Python, but you can find my C++ code for converting the depth map to 3D point cloud coordinates (given the camera's intrinsic calibration coefficients) here:

https://github.com/dusty-nv/jetson-inference/blob/c6602dd46fd9a5fd46934db8933cb54b18665bae/c/depthNet.cpp#L510

dusty-nv commented 1 year ago

Also I have the cudaPointCloud class from jetson-utils that does the point cloud extraction in CUDA: https://github.com/dusty-nv/jetson-utils/blob/f0bff5c502f9ac6b10aa2912f1324797df94bc2d/cuda/cudaPointCloud.h

It does not have Python bindings for it though.

dige-mothership commented 1 year ago

@dusty-nv thank you, I'll take a look

dige-mothership commented 12 months ago

Hi @dusty-nv I am trying to save the pointcloud information here, im not sure what im doing wrong here `/*

include "videoSource.h"

include "videoOutput.h"

include "cudaOverlay.h"

include "cudaMappedMemory.h"

include "depthNet.h"

include

bool signal_recieved = false;

void sig_handler(int signo) { if( signo == SIGINT ) { printf("received SIGINT\n"); signal_recieved = true; } }

int usage() { printf("usage: depthnet [--help] [--network NETWORK]\n"); printf(" [--colormap COLORMAP] [--filter-mode MODE]\n"); printf(" [--visualize VISUAL] [--depth-size SIZE]\n"); printf(" input_URI [output_URI]\n\n"); printf("Mono depth estimation on a video/image stream using depthNet DNN.\n\n"); printf("See below for additional arguments that may not be shown above.\n\n"); printf("optional arguments:\n"); printf(" --help show this help message and exit\n"); printf(" --network=NETWORK pre-trained model to load (see below for options)\n"); printf(" --visualize=VISUAL controls what is displayed (e.g. --visualize=input,depth)\n"); printf(" valid combinations are: 'input', 'depth' (comma-separated)\n"); printf(" --depth-size=SIZE scales the size of the depth map visualization, as a\n"); printf(" percentage of the input size (default is 1.0)\n"); printf(" --filter-mode=MODE filtering mode used during visualization,\n"); printf(" options are: 'point' or 'linear' (default: 'linear')\n"); printf(" --colormap=COLORMAP depth colormap (default is 'viridis-inverted')\n"); printf(" options are: 'inferno', 'inferno-inverted',\n"); printf(" 'magma', 'magma-inverted',\n"); printf(" 'parula', 'parula-inverted',\n"); printf(" 'plasma', 'plasma-inverted',\n"); printf(" 'turbo', 'turbo-inverted',\n"); printf(" 'viridis', 'viridis-inverted'\n\n"); printf("positional arguments:\n"); printf(" input_URI resource URI of input stream (see videoSource below)\n"); printf(" output_URI resource URI of output stream (see videoOutput below)\n\n");

printf("%s", depthNet::Usage());
printf("%s", videoSource::Usage());
printf("%s", videoOutput::Usage());
printf("%s", Log::Usage());

return 0;

}

// // depth map buffers // typedef float4 pixelType; // this can be uchar3, uchar4, float3, float4

pixelType imgDepth = NULL; // colorized depth map image pixelType imgComposite = NULL; // original image with depth map next to it

int2 inputSize; int2 depthSize; int2 compositeSize; // Initializing a float2 const char* pointCloudFile = "point cloud"; float2 focalLength = make_float2(70, 200); float2 principalPoint = make_float2(720, 360);

// allocate depth map & output buffers bool allocBuffers( int width, int height, uint32_t flags, float depthScale ) { // check if the buffers were already allocated for this size if( imgDepth != NULL && width == inputSize.x && height == inputSize.y ) return true;

// free previous buffers if they exit
CUDA_FREE_HOST(imgDepth);
CUDA_FREE_HOST(imgComposite);

// allocate depth map
inputSize = make_int2(width, height);
depthSize = make_int2(width * depthScale, height * depthScale);

if( !cudaAllocMapped(&imgDepth, depthSize) )
{
    LogError("depthnet:  failed to allocate CUDA memory for depth map (%ix%i)\n", depthSize.x, depthSize.y);
    return false;
}

// allocate composite image
compositeSize = make_int2(0,0);

if( flags & depthNet::VISUALIZE_DEPTH )
{
    compositeSize.x += depthSize.x;
    compositeSize.y = depthSize.y;
}

if( flags & depthNet::VISUALIZE_INPUT )
{
    compositeSize.x += inputSize.x;
    compositeSize.y = inputSize.y;
}

if( !cudaAllocMapped(&imgComposite, compositeSize) )
{
    LogError("depthnet:  failed to allocate CUDA memory for composite image (%ix%i)\n", compositeSize.x, compositeSize.y);
    return false;
}

return true;

}

int main( int argc, char* argv ) { /

parse command line */ commandLine cmdLine(argc, argv);

if( cmdLine.GetFlag("help") ) return usage();

/*
attach signal handler */ if( signal(SIGINT, sig_handler) == SIG_ERR ) LogError("can't catch SIGINT\n");

/*
create input stream / videoSource input = videoSource::Create(cmdLine, ARG_POSITION(0));

if( !input ) { LogError("depthnet: failed to create input stream\n"); return 1; }

/*
create output stream / videoOutput output = videoOutput::Create(cmdLine, ARG_POSITION(1));

if( !output ) { LogError("depthnet: failed to create output stream\n"); return 1; }

/*
create mono-depth network / depthNet net = depthNet::Create(cmdLine);

if( !net ) { LogError("depthnet: failed to initialize depthNet\n"); return 1; }

// parse the desired colormap const cudaColormapType colormap = cudaColormapFromStr(cmdLine.GetString("colormap", "viridis-inverted"));

// parse the desired filter mode const cudaFilterMode filterMode = cudaFilterModeFromStr(cmdLine.GetString("filter-mode"));

// parse the visualization flags const uint32_t visualizationFlags = depthNet::VisualizationFlagsFromStr(cmdLine.GetString("visualize"));

// get the depth map size scaling factor const float depthScale = cmdLine.GetFloat("depth-size", 1.0);

/*

processing loop / while( !signal_recieved ) { // capture next image pixelType imgInput = NULL; int status = 0;

if( !input->Capture(&imgInput, &status) )
{
    if( status == videoSource::TIMEOUT )
        continue;

    break; // EOS
}

// allocate buffers for this size frame
if( !allocBuffers(input->GetWidth(), input->GetHeight(), visualizationFlags, depthScale) )
{
    LogError("depthnet:  failed to allocate output buffers\n");
    continue;
}

// infer the depth and visualize the depth map
if( !net->Process(imgInput, inputSize.x, inputSize.y, 
               imgDepth, depthSize.x, depthSize.y, 
               colormap, filterMode) )
{
    LogError("depthnet-camera:  failed to process depth map\n");
    continue;
}

// overlay the images into composite output image
if( visualizationFlags & depthNet::VISUALIZE_INPUT )
    CUDA(cudaOverlay(imgInput, inputSize, imgComposite, compositeSize, 0, 0));

if( visualizationFlags & depthNet::VISUALIZE_DEPTH )
    CUDA(cudaOverlay(imgDepth, depthSize, imgComposite, compositeSize, (visualizationFlags & depthNet::VISUALIZE_INPUT) ? inputSize.x : 0, 0));

net->SavePointCloud(pointCloudFile,imgInput, input->GetWidth(), input->GetHeight(), focalLength, principalPoint);
// render outputs
if( output != NULL )
{
    output->Render(imgComposite, compositeSize.x, compositeSize.y);

    // update the status bar
    char str[256];
    sprintf(str, "TensorRT %i.%i.%i | %s | Network %.0f FPS", NV_TENSORRT_MAJOR, NV_TENSORRT_MINOR, NV_TENSORRT_PATCH, net->GetNetworkName(), net->GetNetworkFPS());
    output->SetStatus(str);

    // check if the user quit
    if( !output->IsStreaming() )
        break;
}

// wait for the GPU to finish       
CUDA(cudaDeviceSynchronize());

// print out timing info
net->PrintProfilerTimes();

}

/*

destroy resources */ LogVerbose("depthnet: shutting down...\n");

SAFE_DELETE(input); SAFE_DELETE(output); SAFE_DELETE(net);

CUDA_FREE_HOST(imgDepth); CUDA_FREE_HOST(imgComposite);

LogVerbose("depthnet: shutdown complete.\n"); return 0; }

`

dige-mothership commented 12 months ago

Here is the error i get:

home/agent/jetson-inference/examples/depthnet/depthnet.cpp: In function ‘int main(int, char**)’: /home/agent/jetson-inference/examples/depthnet/depthnet.cpp:251:114: error: no matching function for call to ‘depthNet::SavePointCloud(const char*&, pixelType*&, uint32_t, uint32_t, float2&, float2&)’ net->SavePointCloud(pointCloudFile,imgInput, input->GetWidth(), input->GetHeight(), focalLength, principalPoint); ^ In file included from /home/agent/jetson-inference/examples/depthnet/depthnet.cpp:29:0: /home/agent/jetson-inference/build/x86_64/include/jetson-inference/depthNet.h:230:7: note: candidate: bool depthNet::SavePointCloud(const char*) bool SavePointCloud( const char* filename ); ^~~~~~~~~~~~~~ /home/agent/jetson-inference/build/x86_64/include/jetson-inference/depthNet.h:230:7: note: candidate expects 1 argument, 6 provided /home/agent/jetson-inference/build/x86_64/include/jetson-inference/depthNet.h:236:7: note: candidate: bool depthNet::SavePointCloud(const char*, float*, uint32_t, uint32_t) bool SavePointCloud( const char* filename, float* rgba, uint32_t width, uint32_t height ); ^~~~~~~~~~~~~~ /home/agent/jetson-inference/build/x86_64/include/jetson-inference/depthNet.h:236:7: note: candidate expects 4 arguments, 6 provided /home/agent/jetson-inference/build/x86_64/include/jetson-inference/depthNet.h:242:7: note: candidate: bool depthNet::SavePointCloud(const char*, float*, uint32_t, uint32_t, const float2&, const float2&) bool SavePointCloud( const char* filename, float* rgba, uint32_t width, uint32_t height, ^~~~~~~~~~~~~~ /home/agent/jetson-inference/build/x86_64/include/jetson-inference/depthNet.h:242:7: note: no known conversion for argument 2 from ‘pixelType* {aka float4*}’ to ‘float*’ /home/agent/jetson-inference/build/x86_64/include/jetson-inference/depthNet.h:249:7: note: candidate: bool depthNet::SavePointCloud(const char*, float*, uint32_t, uint32_t, const float (*)[3]) bool SavePointCloud( const char* filename, float* rgba, uint32_t width, uint32_t height, ^~~~~~~~~~~~~~ /home/agent/jetson-inference/build/x86_64/include/jetson-inference/depthNet.h:249:7: note: candidate expects 5 arguments, 6 provided /home/agent/jetson-inference/build/x86_64/include/jetson-inference/depthNet.h:256:7: note: candidate: bool depthNet::SavePointCloud(const char*, float*, uint32_t, uint32_t, const char*) bool SavePointCloud( const char* filename, float* rgba, uint32_t width, uint32_t height, ^~~~~~~~~~~~~~ /home/agent/jetson-inference/build/x86_64/include/jetson-inference/depthNet.h:256:7: note: candidate expects 5 arguments, 6 provided [ 82%] Built target segnet make[2]: *** [examples/depthnet/CMakeFiles/depthnet.dir/build.make:76: examples/depthnet/CMakeFiles/depthnet.dir/depthnet.cpp.o] Error 1 make[1]: *** [CMakeFiles/Makefile2:474: examples/depthnet/CMakeFiles/depthnet.dir/all] Error 2

dusty-nv commented 12 months ago

@dige-mothership try casting your img pointer to float* like this:

net->SavePointCloud(pointCloudFile, (float*)imgInput, input->GetWidth(), input->GetHeight(), focalLength, principalPoint);

dige-mothership commented 12 months ago

@dusty-nv Solved!!! thanks a million

dige-mothership commented 12 months ago

@dusty-nv one more thing, ive been getting this error:

[TRT] depthNet::SavePointCloud() -- failed to upsample depth field

and i see its from here

if( !Visualize((void*)depthField, width, height, IMAGE_GRAY32F, COLORMAP_NONE, FILTER_LINEAR) ) { printf(LOG_TRT "depthNet::SavePointCloud() -- failed to upsample depth field\n"); return false; }

Can you think of any reason why i may be getting it?

dusty-nv commented 12 months ago

@dige-mothership it's been a long time since I used that code so I'm not sure, but was there any other errors printed out?

dige-mothership commented 12 months ago

@dusty-nv This is all of it:

[TRT] depthNet::Visualize() -- unsupported image format (gray32f) [TRT] supported formats are: [TRT] * rgb8 [TRT] * rgba8 [TRT] * rgb32f [TRT] * rgba32f [TRT] depthNet::SavePointCloud() -- failed to upsample depth field

dige-mothership commented 12 months ago

I wonder if i the problem is the image type that is being passed to depthNet::SavePointCloud()

dusty-nv commented 12 months ago

@dige-mothership try commenting out this if statement:

https://github.com/dusty-nv/jetson-inference/blob/a444f0c987e5584ad21616737fe543cf7fb794d9/c/depthNet.cpp#L408

Then recompile/reinstall. I traced it through and it appears that it should work, that if check was added later.

Also there is https://github.com/dusty-nv/jetson-inference/tree/master/tools/depth-viewer if you haven't tried running that yet

dige-mothership commented 12 months ago

@dusty-nv perfect, i have successfully extracted the pointcloud, thank you

dusty-nv / jetson-inference

Getting real world 3D coordinates from depth map and image coordinates #1676

include "videoSource.h"

include "videoOutput.h"

include "cudaOverlay.h"

include "cudaMappedMemory.h"

include "depthNet.h"

include