microsoft / Azure-Kinect-Sensor-SDK

A cross platform (Linux and Windows) user mode SDK to read data from your Azure Kinect device.
https://Azure.com/Kinect
MIT License
1.47k stars 613 forks source link

IR image to color camera transformation changes pixel values #1972

Open KaboomHBR opened 4 months ago

KaboomHBR commented 4 months ago

Hi there,

I am following this thread in an attempt to transform the IR image to have the same geometry as the color image. The code runs fine. However, I noticed that the output image does not have the same appearance as the original image: originally the maximum pixel value of the image was 65535, whilst after the transformation it becomes 14902. Visibly speaking, the new image is much duller, and I noticed that the original maximum values becomes "invalid" and the "invalid_custom_value" was used (in this case its 0, so the original brightest point is now completely dark). Really need some help, thanks!!

ir_image_custom_2 ir_image_transformed_2

Below is my .cpp code:

#include <k4a/k4a.hpp>
#include <k4arecord/playback.hpp>
#include <sys/stat.h>
#include <opencv2/opencv.hpp>
#include <iostream>
#include <fstream>
#include <string>
#include <sstream>
#include <chrono>

// just for profiling 
void saveImage(const std::string& filename, const cv::Mat& image) {
    cv::imwrite(filename, image);
}

// Function to check if a directory exists
bool directory_exists(const std::string& dir){
    struct stat buffer;
    return (stat(dir.c_str(), &buffer) == 0);
}

// Function to create a directory
void create_directory(const std::string& dir){
    const int dir_err = mkdir(dir.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
    if(dir_err == -1){
        std::cerr << "Error creating directory" << std::endl;
        exit(1);
    }
}

// Function to generate a downsclaed color transformation handle
k4a::transformation downscale_transformation_2(const k4a::calibration calibration){
    // https://github.com/microsoft/Azure-Kinect-Sensor-SDK/issues/762
    k4a::calibration calibration_color_downscaled;
    memcpy(&calibration_color_downscaled, &calibration, sizeof(k4a::calibration));
    calibration_color_downscaled.color_camera_calibration.resolution_width /= 2;
    calibration_color_downscaled.color_camera_calibration.resolution_height /= 2;
    calibration_color_downscaled.color_camera_calibration.intrinsics.parameters.param.cx /= 2;
    calibration_color_downscaled.color_camera_calibration.intrinsics.parameters.param.cy /= 2;
    calibration_color_downscaled.color_camera_calibration.intrinsics.parameters.param.fx /= 2;
    calibration_color_downscaled.color_camera_calibration.intrinsics.parameters.param.fy /= 2;
    k4a::transformation transformation_color_downscaled = k4a::transformation(calibration_color_downscaled);

    return transformation_color_downscaled;
}

k4a::image create_custom_ir_image(k4a::image ir_image){
    // https://github.com/microsoft/Azure-Kinect-Sensor-SDK/issues/1188
    int ir_image_width = ir_image.get_width_pixels();
    int ir_image_height = ir_image.get_height_pixels();
    int ir_image_stride = ir_image.get_stride_bytes();
    uint8_t *ir_image_buffer = ir_image.get_buffer();
    k4a::image custom_ir = k4a::image::create_from_buffer(K4A_IMAGE_FORMAT_CUSTOM16,
                                                            ir_image_width,
                                                            ir_image_height,
                                                            ir_image_width * (int) sizeof(uint16_t),
                                                            ir_image_buffer, 
                                                            ir_image_stride * ir_image_height,
                                                            NULL,
                                                            NULL);
    return custom_ir;
}

int main(){
    const char* file_path = "./huge.mkv";
    const std::string output_dir = "./downscaledFrames";

    // Check if the output directory exists, if not, create it
    if(!directory_exists(output_dir)){
        create_directory(output_dir);
    }
    k4a::playback playback = k4a::playback::open(file_path);
    if (!playback){
        std::cout << "Failed to open file: " << file_path << std::endl;
        return 1;
    }
    else{
        std::cout << "=====Successfully opened file: " << file_path << std::endl;
    }

    k4a::capture capture;
    int frame_count = 0;
    int batch_size = 1;
    k4a::calibration calibration = playback.get_calibration();
    k4a::transformation transformation_original = k4a::transformation(calibration);
    k4a::transformation transformation_color_downscaled = downscale_transformation_2(calibration);

    while (playback.get_next_capture(&capture)){
        if (frame_count == 4){
            return 0;
        }
        if ((capture.get_depth_image()) && (capture.get_color_image()) && (capture.get_ir_image())){
            k4a::image depth_image = capture.get_depth_image();
            k4a::image color_image = capture.get_color_image();
            k4a::image ir_image = capture.get_ir_image();

            // ir processing
            k4a::image ir_image_custom = create_custom_ir_image(ir_image);
            std::pair<k4a::image, k4a::image> images = transformation_original.depth_image_to_color_camera_custom(depth_image, ir_image_custom, K4A_TRANSFORMATION_INTERPOLATION_TYPE_NEAREST, 0);
            cv::Mat ir_image_original = cv::Mat(ir_image.get_height_pixels(), ir_image.get_width_pixels(), CV_16U, (void*)ir_image.get_buffer());

            // store the original ir image
            std::string ir_image_original_filename = output_dir + "/ir_image_original_" + std::to_string(frame_count) + ".png";
            saveImage(ir_image_original_filename, ir_image_original);

            // store the transformed ir image
            cv::Mat ir_image_transformed = cv::Mat(images.second.get_height_pixels(), images.second.get_width_pixels(), CV_16U, (void*)images.second.get_buffer());
            std::string ir_image_transformed_filename = output_dir + "/ir_image_transformed_" + std::to_string(frame_count) + ".png";
            saveImage(ir_image_transformed_filename, ir_image_transformed);

            // store the custom ir image
            cv::Mat ir_image_custom_mat = cv::Mat(ir_image_custom.get_height_pixels(), ir_image_custom.get_width_pixels(), CV_16U, (void*)ir_image_custom.get_buffer());
            std::string ir_image_custom_filename = output_dir + "/ir_image_custom_" + std::to_string(frame_count) + ".png";
            saveImage(ir_image_custom_filename, ir_image_custom_mat);

            frame_count++;

        }
    }

    auto ir_disk_end = std::chrono::high_resolution_clock::now();

    std::cout << "=====Total frames: " << frame_count + 1 << std::endl;

    return 0;
}

Desktop:

rajkundu commented 3 months ago

I'm on the same team as @KaboomHBR. I think we determined that this is totally normal, expected behavior caused by the invalidity of the depth data when IR = 65535. Thus, after transformation, the 65535 pixels are replaced with invalid_custom_value (usually 0).