Closed sctrueew closed 1 year ago
...
// sort results
float* pdata = outputTensor.GetTensorMutableData<float>();
std::vector<std::pair<size_t, float>> indexValuePairs;
for (size_t i = 0; i < numClasses; ++i) {
indexValuePairs.emplace_back(i, pdata[i]);
}
std::sort(indexValuePairs.begin(), indexValuePairs.end(), [](const auto& lhs, const auto& rhs) { return lhs.second > rhs.second; });
// show Top5
for (size_t i = 0; i < 5; ++i) {
const auto& result = indexValuePairs[i];
std::cout << i + 1 << ": " << result.first << " " << result.second << std::endl;
}
The final result also needs to softmax()
I did that but didn't get the correct result
`std::vector<float> res;
float sum = 0.0f;
float t;
for (int i = 0; i < probVec.size(); i++) {
auto sec = probVec[i];
if (sec > 0) {
t = expf(sec);
res.push_back(t);
sum += t;
}
}
for (int i = 0; i < res.size(); i++) {
res[i] /= sum;
}
const int topk = std::min(5, (int)res.size());
for (size_t i = 0; i < topk; ++i) {
const auto& conf = res[i];
std::cout << " ID " << i << " - " << " confidence "
<< conf << std::endl;
}`
remove it. and topk is the top max, may be you shoul sort res
I've removed that. here is my final code:
` void loadObj()
{
std::cout << "OX model loading..." << std::endl;
std::string instanceName{ "Image classifier inference" };
env = std::make_shared<Ort::Env>(OrtLoggingLevel::ORT_LOGGING_LEVEL_WARNING,
instanceName.c_str());
/**************** Create ORT session ******************/
// Set up options for session
Ort::SessionOptions sessionOptions;
// Enable CUDA
if (config.isGPU)
sessionOptions.AppendExecutionProvider_CUDA(OrtCUDAProviderOptions{});
// Sets graph optimization level (Here, enable all possible optimizations)
sessionOptions.SetGraphOptimizationLevel(
GraphOptimizationLevel::ORT_ENABLE_ALL);
// Create session by loading the onnx model
std::wstring widestr = std::wstring(config.modelPath.begin(), config.modelPath.end());
const wchar_t* model_path = widestr.c_str();
session = std::make_shared<Ort::Session>(*env, model_path,
sessionOptions);
std::cout << "OX model loaded" << std::endl;
}
std::vector<float> loadImage(const cv::Mat frame, int sizeX = 224, int sizeY = 224)
{
cv::Mat image = frame;// cv::imread(filename);
if (image.empty()) {
std::cout << "No image found.";
}
// convert from BGR to RGB
cv::cvtColor(image, image, cv::COLOR_BGR2RGB);
// resize
cv::resize(image, image, cv::Size(sizeX, sizeY));
// reshape to 1D
image = image.reshape(1, 1);
std::vector<float> vec;
image.convertTo(vec, CV_32FC1, 1. / 255);
// Transpose (Height, Width, Channel)(224,224,3) to (Chanel, Height, Width)(3,224,224)
std::vector<float> output;
for (size_t ch = 0; ch < 3; ++ch) {
for (size_t i = ch; i < vec.size(); i += 3) {
output.emplace_back(vec[i]);
}
}
return output;
}
std::vector<float> ox_softmax(std::vector<std::pair<size_t, float>> prob, int n) {
std::vector<float> res;
float sum = 0.0f;
float t;
for (int i = 0; i < n; i++) {
auto sec = prob[i].second;
t = expf(sec);
res.push_back(t);
sum += t;
}
for (int i = 0; i < res.size(); i++) {
res[i] /= sum;
}
return res;
}
std::vector<MlResult> Detect(cv::Mat frame)
{
std::vector<MlResult> result;
Ort::TypeInfo inputTypeInfo = session->GetInputTypeInfo(0);
auto inputTensorInfo = inputTypeInfo.GetTensorTypeAndShapeInfo();
ONNXTensorElementDataType inputType = inputTensorInfo.GetElementType();
#ifdef VERBOSE
std::cout << "Input Type: " << inputType << std::endl;
#endif
// Get the shape of the input
mInputDims = inputTensorInfo.GetShape();
#ifdef VERBOSE
std::cout << "Input Dimensions: " << mInputDims << std::endl;
#endif
Ort::TypeInfo outputTypeInfo = session->GetOutputTypeInfo(0);
auto outputTensorInfo = outputTypeInfo.GetTensorTypeAndShapeInfo();
// Get the shape of the output
mOutputDims = outputTensorInfo.GetShape();
#ifdef VERBOSE
std::cout << "Output Dimensions: " << mOutputDims << std::endl << std::endl;
#endif
const int64_t numChannels = mInputDims[1]; //3;
const int64_t width = mInputDims[2];// 224;
const int64_t height = mInputDims[3];// 224;
const int64_t numClasses = mOutputDims[1];
int numInputElements = numChannels * height * width;
// load image
const std::vector<float> imageVec = loadImage(frame, width, height);
// define shape
const std::array<int64_t, 4> inputShape = { 1, numChannels, height, width };
const std::array<int64_t, 2> outputShape = { 1, numClasses };
// define array
//std::array<float, numInputElements> input;
float* input = new float[numInputElements];
float* results = new float[numClasses];
//std::array<float, numClasses> results;
// define Tensor
auto memory_info = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
auto inputTensor = Ort::Value::CreateTensor<float>(memory_info, input, numInputElements, inputShape.data(), inputShape.size());
auto outputTensor = Ort::Value::CreateTensor<float>(memory_info, results, numClasses, outputShape.data(), outputShape.size());
// copy image data to input array
std::copy(imageVec.begin(), imageVec.end(), input);
// define names
Ort::AllocatorWithDefaultOptions ort_alloc;
auto inputName = session->GetInputName(0, ort_alloc);
auto outputName = session->GetOutputName(0, ort_alloc);
const std::array<const char*, 1> inputNames = { inputName };
const std::array<const char*, 1> outputNames = { outputName };
ort_alloc.Free(inputName);
ort_alloc.Free(outputName);
// run inference
try {
session->Run(runOptions, inputNames.data(), &inputTensor, 1, outputNames.data(), &outputTensor, 1);
}
catch (Ort::Exception& e) {
std::cout << e.what() << std::endl;
//throw e;
}
// sort results
std::vector<std::pair<size_t, float>> indexValuePairs;
for (size_t i = 0; i < numClasses; ++i) {
indexValuePairs.emplace_back(i, results[i]);
}
std::sort(indexValuePairs.begin(), indexValuePairs.end(), [](const auto& lhs, const auto& rhs) { return lhs.second > rhs.second; });
delete[] results;
delete[] input;
auto res = ox_softmax(indexValuePairs, numClasses);
const int topk = std::min(config.topK, (int)res.size());
for (size_t i = 0; i < topk; ++i) {
const auto& resultd = indexValuePairs[i];
const auto& conf = res[i];
MlResult mr;
mr.score = conf;
mr.class_id = resultd.first;
result.push_back(mr);
}
return result;
}
void dispose() {
// Destroy the engine
session->release();
}`
Maybe you should change your img preprocessing. https://github.com/UNeedCryDear/yolov5-seg-opencv-onnxruntime-cpp/blob/d9ce33c26631ff6cdfbb905420bec998388e472d/yolov5_seg_utils.cpp#L14
I have tested it but it didn't work
@UNeedCryDear Hi, do you have any plans to implement the cls models?
Sorry ,I have no plan. But, guy, I know why your results are not consistent with those below python.
if debug,you can see the processing here is different from the object detection.
the CenterCrop and the Normalize both used here. So,you should add CenterCrop and normalization in loadImage(), or try the following code:
Mat blob;
cv::dnn::blobFromImage(frame, blob, 1 / 255.0, Size(224, 224), cv::Scalar(104, 117, 123), true, true);
float* pp = (float*)blob.data;
std::copy(pp, pp + numInputElements, input);
Thanks for the reply, I added
cv::Mat blob;
cv::dnn::blobFromImage(frame, blob, 1 / 255.0, cv::Size(224, 224), cv::Scalar(104, 117, 123), true, true);
float* pp = (float*)blob.data;
std::copy(pp, pp + numInputElements, input);
instead of
const std::vector<float> imageVec = loadImage(frame, width, height);
I couldn't get the right result
If this doesn't work, you may have to achieve these functions yourself.
@UNeedCryDear Hi, The problem has been solved by rewriting the preprocessing and loading image functions. Thanks
Congratulations !
Hi, thanks for your work. I have trained a model with yolo-cls and exported it to ONNX. I'm using this for inferencing but the result is not correct. Could you please guide me?
Thanks in advance