ONNXRuntime-Cpp and ONNXRuntime python give different results: #13338

Open devendraswamy opened 1 hour ago

devendraswamy commented 1 hour ago

I am facing the problem with YOLOV5 model. While I am testing my Python ONNX code, all the bounding box (bbox) values are correct. However, when I perform the same process with my C++ code, I am getting incorrect bbox values.

the image processed in ptyhon code: image_data = np.expand_dims(image_data, axis=0) # Add batch dimension

and feed that image to python pyd file (c++ inference file complied to pyd)

auto output_tensors = session.Run(Ort::RunOptions{ nullptr }, input_names, &input_tensor, 1, output_names, 1);


complied or build C++ code is:


include <pybind11/pybind11.h>

include <pybind11/numpy.h>





using namespace std; namespace py = pybind11;

class OnnxModel { public: OnnxModel(const std::string& model_path) : env(ORT_LOGGING_LEVEL_WARNING, "OnnxModel"), session(env, std::wstring(model_path.begin(), model_path.end()).c_str(), Ort::SessionOptions()) { Ort::AllocatorWithDefaultOptions allocator;

    // Get input and output names as Ort::AllocatedStringPtr
    Ort::AllocatedStringPtr input_name_alloc = session.GetInputNameAllocated(0, allocator);
    Ort::AllocatedStringPtr output_name_alloc = session.GetOutputNameAllocated(0, allocator);

    // Convert the Ort::AllocatedStringPtr to std::string using the get() method
    input_name = std::string(input_name_alloc.get());
    output_name = std::string(output_name_alloc.get());

    // Optional: Print the input and output names for debugging
    std::cout << "Input name: " << input_name << std::endl;
    std::cout << "Output name: " << output_name << std::endl;

// Accept a 4D numpy array: (batch_size, channels, height, width)
py::array_t<float> run(py::array_t<float> input_array) {
    // Request a buffer from the numpy array
    py::buffer_info buf = input_array.request();

    // Check that the input is indeed a 4-dimensional array
    if (buf.ndim != 4) {
        throw std::runtime_error("Input should be a 4-dimensional array (batch_size, channels, height, width)");

    // Convert numpy array data to std::vector<float>
    std::vector<float> input_data(static_cast<float*>(buf.ptr), 
                                  static_cast<float*>(buf.ptr) + buf.size);

    // Run the inference
    return run_inf(input_data, {1, 3, 640, 640});  // Adjust shape based on your model's input

py::array_t<float> run_inf(const std::vector<float>& input_data, const std::array<int64_t, 4>& input_shape) {
    // Create input tensor
    Ort::MemoryInfo memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
    Ort::Value input_tensor = Ort::Value::CreateTensor<float>(

    // Prepare input and output names
    const char* input_names[] = { input_name.c_str() };
    const char* output_names[] = { output_name.c_str() };

    // Run the model
    auto output_tensors = session.Run(Ort::RunOptions{ nullptr }, input_names, &input_tensor, 1, output_names, 1);

    // Get the output data
    float* output_data = output_tensors[0].GetTensorMutableData<float>();
    size_t output_count = output_tensors[0].GetTensorTypeAndShapeInfo().GetElementCount();

    // Create a numpy array from the output data
    return py::array_t<float>(output_count, output_data);

private: Ort::Env env; Ort::Session session; std::string input_name; std::string output_name; };

PYBIND11_MODULE(onnxloader, m) { py::class(m, "OnnxModel") .def(py::init<const std::string&>()) .def("run", &OnnxModel::run); }

Image feeding from python code:

Function to preprocess the image

def preprocess_image(image_path, input_size=(640, 640)):

Load the image using OpenCV

image = cv2.imread(image_path, cv2.IMREAD_COLOR)  # Load image in color mode
if image is None:
    raise ValueError(f"Could not open or find the image: {image_path}")
# Convert from BGR to RGB format
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# Resize the image to match the input size expected by the model
image = cv2.resize(image, input_size)
# Normalize the image to [0, 1] range
image = image.astype(np.float32) / 255.0  # Convert to float and normalize
# Rearrange the image to CHW format (1, C, H, W)
image_data = np.transpose(image, (2, 0, 1))  # Convert to CHW format
image_data = np.expand_dims(image_data, axis=0)  # Add batch dimension
print(f"Image preprocessed: type = {type(image_data)}, shape = {image_data.shape}")
return image_data, image  # Return the preprocessed image data
