Open adnan134j2 opened 2 years ago
@hariharans29 Please help me solve this issue.
` // https://github.com/microsoft/onnxruntime/blob/v1.8.2/csharp/test/Microsoft.ML.OnnxRuntime.EndToEndTests.Capi/CXX_Api_Sample.cpp // https://github.com/microsoft/onnxruntime/blob/v1.8.2/include/onnxruntime/core/session/onnxruntime_cxx_api.h
template
template
std::ostream& operator<<(std::ostream& os, const ONNXTensorElementDataType& type) { switch (type) { case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED: os << "undefined"; break; case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT: os << "float"; break; case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8: os << "uint8_t"; break; case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT8: os << "int8_t"; break; case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT16: os << "uint16_t"; break; case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT16: os << "int16_t"; break; case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32: os << "int32_t"; break; case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64: os << "int64_t"; break; case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING: os << "std::string"; break; case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_BOOL: os << "bool"; break; case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16: os << "float16"; break; case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE: os << "double"; break; case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT32: os << "uint32_t"; break; case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT64: os << "uint64_t"; break; case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_COMPLEX64: os << "float real + float imaginary"; break; case ONNXTensorElementDataType:: ONNX_TENSOR_ELEMENT_DATA_TYPE_COMPLEX128: os << "double real + float imaginary"; break; case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_BFLOAT16: os << "bfloat16"; break; default: break; }
return os;
}
std::vector
int main(int argc, char* argv[]) { bool useCUDA{ false }; if (useCUDA) { std::cout << "Inference Execution Provider: CUDA" << std::endl; } else { std::cout << "Inference Execution Provider: CPU" << std::endl; } std::string instanceName{ "image-classification-inference" }; std::string imageFilepath{ "D:\OOOOOONNNNNXXXXX\cpp-onnxruntime-resnet-console-app-main\OnnxRuntimeResNet\assets\dog.png" }; Ort::Env env(OrtLoggingLevel::ORT_LOGGING_LEVEL_WARNING, instanceName.c_str()); Ort::SessionOptions sessionOptions; sessionOptions.SetIntraOpNumThreads(1); if (useCUDA) { OrtCUDAProviderOptions cuda_options{ 0 }; sessionOptions.AppendExecutionProvider_CUDA(cuda_options); } sessionOptions.SetGraphOptimizationLevel( GraphOptimizationLevel::ORT_ENABLE_EXTENDED); Ort::Session session(env, L"D:\OOOOOONNNNNXXXXX\cpp-onnxruntime-resnet-console-app-main\OnnxRuntimeResNet\assets\model.onnx", sessionOptions);
Ort::AllocatorWithDefaultOptions allocator;
size_t numInputNodes = session.GetInputCount();
size_t numOutputNodes = session.GetOutputCount();
std::cout << "Number of Input Nodes: " << numInputNodes << std::endl;
std::cout << "Number of Output Nodes: " << numOutputNodes << std::endl;
Ort::TypeInfo inputTypeInfo = session.GetInputTypeInfo(0);
auto inputTensorInfo = inputTypeInfo.GetTensorTypeAndShapeInfo();
ONNXTensorElementDataType inputType = inputTensorInfo.GetElementType();
std::cout << "Input Type: " << inputType << std::endl;
Ort::TypeInfo outputTypeInfo = session.GetOutputTypeInfo(0);
auto outputTensorInfo = outputTypeInfo.GetTensorTypeAndShapeInfo();
ONNXTensorElementDataType outputType = outputTensorInfo.GetElementType();
std::cout << "Output Type: " << outputType << std::endl;
///Things used later on
const char* inputName = session.GetInputName(0, allocator);
std::cout << "Input Name: " << inputName << std::endl;
std::vector<int64_t> inputDims = inputTensorInfo.GetShape();
std::cout << "Input Dimensions: " << inputDims << std::endl;
const char* outputName = session.GetOutputName(0, allocator);
std::cout << "Output Name: " << outputName << std::endl;
std::vector<int64_t> outputDims = outputTensorInfo.GetShape();
std::cout << "Output Dimensions: " << outputDims << std::endl;
Ort::MemoryInfo memoryInfo = Ort::MemoryInfo::CreateCpu(OrtAllocatorType::OrtArenaAllocator, OrtMemType::OrtMemTypeDefault);
std::vector<Ort::Value> inputTensors;
static constexpr const int width = 500;
static constexpr const int height = 500;
static constexpr const int channel = 3;
std::array<int64_t, 4> input_shape_{ 1,height, width,channel };
std::array<int64_t, 2> outputt_shape_{ 1, 8 };
std::string img_path = "D:\\cpp-onnxruntime-resnet-console-app-main\\OnnxRuntimeResNet\\assets\\dog.jpg";
cv::Mat imgSource = cv::imread(img_path);
std::array<uint8_t, width* height* channel> input_image_{};
uint8_t* input = input_image_.data();
for (int i = 0; i < imgSource.rows; i++) {
for (int j = 0; j < imgSource.cols; j++) {
for (int c = 0; c < 3; c++)
{
//NHWC
if (c == 0)
input[i * imgSource.cols * 3 + j * 3 + c] = imgSource.ptr<uint8_t>(i)[j * 3 + 2];
if (c == 1)
input[i * imgSource.cols * 3 + j * 3 + c] = imgSource.ptr<uint8_t>(i)[j * 3 + 1];
if (c == 2)
input[i * imgSource.cols * 3 + j * 3 + c] = imgSource.ptr<uint8_t>(i)[j * 3 + 0];
}
}
}
inputTensors.push_back(Ort::Value::CreateTensor<uint8_t>(
memoryInfo, input, input_image_.size(), input_shape_.data(), input_shape_.size()));
// size_t outputTensorSize = vectorProduct(outputtshape);
std::vector<const char*> inputNames{ inputName };
std::vector<const char*> outputNames{ "detection_anchor_indices", "detection_boxes", "detection_classes", "detection_multiclass_scores", "detection_scores", "num_detections", "raw_detection_boxes", "raw_detection_scores" };
std::vector<float> outputTensorValues(outputt_shape_.size());
// inputTensors.push_back(Ort::Value::CreateTensor<uint8_t>(
// memoryInfo, input, input_image_.size(), input_shape_.data(), input_shape_.size()));
/////////////////////////////////////////////////////////////////////////////////
std::vector<Ort::Value> outputTensors; //
/////////////////////////////////////////////////////////////////////////////////
//std::cout << "memoryInfo: " << memoryInfo << std::endl;
//std::cout << "outputTensorValues.data(): " << outputTensorValues.data() << std::endl;
//std::cout << "outputDims.data(): " << outputDims.data() << std::endl;
//std::cout << "outputDims.size(): " << outputDims.size() << std::endl;
std::cout << "memoryInfo: " << memoryInfo << std::endl;
outputTensors.push_back(Ort::Value::CreateTensor<float>(
memoryInfo, outputTensorValues.data(), 8,
outputt_shape_.data(), outputt_shape_.size()));
//std::cout << "memoryInfo: " << memoryInfo << std::endl;
//std::cout << "inputDims.data(): " << inputDims.data() << std::endl;
//std::cout << "inputDims.size(): " << inputDims.size() << std::endl;
//std::cout << "memoryInfo: " << memoryInfo << std::endl;
///std::cout << "outputTensorValues.data(): " << outputTensorValues.data() << std::endl;
////std::cout << "outputTensorSize: " << outputTensorSize << std::endl;
//std::cout << "inputNames.data(): " << inputNames.data() << std::endl;
//std::cout << "inputTensors.data() " << inputTensors.data() << std::endl;
//std::cout << "outputNames.data() " << outputNames.data() << std::endl;
//std::cout << "outputTensors.data() " << outputTensors.data() << std::endl;
std::cout << "Done with test 1" << std::endl;
//session.Run(Ort::RunOptions{ nullptr }, inputNames.data(),
// inputTensors.data(), 1, outputNames.data(), outputTensors.data(),
//outputNames.size());
try {
session.Run(Ort::RunOptions{ nullptr }, inputNames.data(),
inputTensors.data(), 1, outputNames.data(),
outputNames.size());
}
catch (const std::runtime_error& re) {
std::cerr << "Runtime error: " << re.what() << std::endl;
}
}`
Start by creating a valid Ort::RunOptions object. See
@pranavsharma please find the model.onnx attached to this link. https://drive.google.com/file/d/19iHmuAqUjznP_jbgK-o3lq_hOFJGNtX2/view
Using the above code I shared, I can now run the model on GPU only if I decrease the input image size. Otherwise it causes this following error "Unhandled exception at 0x00007FFF0A9D3F68 (cufft64_10.dll) in OnnxRuntimeResNet.exe: 0xC00000FD: Stack overflow (parameters: 0x0000000000000001, 0x0000006160A03000)." Please help me look into this issue\, I shall be very thankful to you.
@pranavsharma using the Python API I can successfully run the model on Cpu and GPU without any issues. In C++, I can run the model on the CPU without any issues. I am using version 1.10.0 now and getting the same error.
So for the exact same input Python API works on GPU but C++ API does not? Can you attach the exact input that fails and the exact repro code in both cases?
@pranavsharma Minimal Python code:
`#2022-04-14 01:14:19,164 - INFO - Model inputs: ['input_tensor']
import time import onnxruntime as rt import numpy as np from PIL import Image
PATH_TO_SAVED_MODEL = 'exported-models/my_modelFasterR/model.onnx' sess = rt.InferenceSession(PATH_TO_SAVED_MODEL, providers=['CUDAExecutionProvider']) image_path = "TestImg//138D79103EU_20210918114359734_1_5_bottomfront_cut_res_Dr1.jpg"
# Note that by convention we put it into a numpy array with shape
# (height, width, channels), where channels=3 for RGB.
img = Image.open(image_path) img_data = np.array(img.getdata()).reshape(img.size[1], img.size[0], 3) img_data = np.expand_dims(img_data.astype(np.uint8), axis=0) outputs = ['detection_anchor_indices', 'detection_boxes', 'detection_classes', 'detection_multiclass_scores', 'detection_scores', 'num_detections', 'raw_detection_boxes', 'raw_detection_scores'] start = time.time() detections = sess.run(outputs, {'input_tensor': img_data})
# Convert to numpy arrays, and take index [0] to remove the batch dimension.
# We're only interested in the first num_detections.
`
@pranavsharma Minimal C++ code:
`#include
///#include
int main()
{
bool useCUDA{ true };
Ort::Env env(OrtLoggingLevel::ORT_LOGGING_LEVEL_WARNING, "Detection");
Ort::SessionOptions session_options;
if (useCUDA) {
OrtSessionOptionsAppendExecutionProvider_CUDA(session_options, 0);
}
session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);
Ort::AllocatorWithDefaultOptions allocator;
Ort::Session session(env, L"D:\model.onnx", session_options);
static constexpr const int width = 500;
static constexpr const int height = 500;
static constexpr const int channel = 3;
std::array<int64_t, 4> inputshape{ 1,height, width,channel };
std::vector
}`
@pranavsharma please look at the minimal code I shared of python and c++. The model works fine with python code both on GPU and CPU using any onnxruntime version. The model also works fine with cpu on my C++ code. However, when I runt it with GPU, the model gives the above mentioned error. When I use input image size of 500x500x3, I get this error,,, but when I reduce the size of the image, the model also works on GPU. I would like to use the model with whatever image size I want to use. Please help me solve this issue. Thanks
@pranavsharma
after resizing the image to smaller size I also have to change the image size in the following lines of my C++ code: 'static constexpr const int width = 500; static constexpr const int height = 500; static constexpr const int channel = 3;'
Start by creating a valid Ort::RunOptions object. See
i am sorry i dont quite understand what you mean by this Please help me solve this issue.
I've a Tesla V100 16GB GPU and tested this on my Ubuntu machine and couldn't repro this. I doubled the input size from 500x500x3 to 1000x1000x3. I used random values for my inputs. See sample code https://gist.github.com/pranavsharma/b3e1faef9fff883beaa8baabd3bb864c.
Any update on this issue? @pranavsharma @adnan134j2
Is your NVIDIA driver up to date? I had a similar issue on my dev box until I found that the latest does not work for me. There is an archive of drivers. I had to downgrade at that point.
Describe the bug I am using onnxrutime-gpu for running object detection model in C++. I installed onnxruntime GPU version 1.6.0. I am using it in visual studio 2019. But no matter what version I use, I am getting this error "Unhandled exception at 0x00007FFABE6A9538 (cudnn_cnn_infer64_8.dll) in Onnx.exe". Model is loaded successfully with onnxruntime-gpu , while performing the inference, it gives this error. Please help me figure this out. The model can be loaded and run successfully with onnxruntime CPU.
System information
To Reproduce
Expected behavior A clear and concise description of what you expected to happen.
Screenshots If applicable, add screenshots to help explain your problem.
Additional context Add any other context about the problem here. If the issue is about a particular model, please share the model details as well to facilitate debugging.