Need help. OpenVino can't detect image but pytorch still can.

Mininggamer commented 2 months ago

I have 2 model, the first one detect metal i used pretrain yolov8n, this model still can detect easily. Down here is image detected by openvino sharp.
But the second model used pretrain yolov8s for detect bip zone can't while image type are the same. This model still can detected by pytorch on python and Here is detected image in python.
Maybe my code seem have problem can u help me with it.

My OpenVino Class https://drive.google.com/file/d/1JkGhYVcMYk9i-jp7egxq2R9PXSWkvbrc/view?usp=sharing First And Second Model https://drive.google.com/file/d/1jDzB7avVJihtKK2lViNW0sI3SvKSqxdA/view?usp=sharing https://drive.google.com/file/d/1mnrbZc1MS41MN3fz74YxjjqYexs9Mk89/view?usp=sharing

guojin-yan commented 2 months ago

May I ask what is the output result of your program? Is there no result or is the result wrong. If the result is incorrect, you can check if there is a problem with the result processing.

Mininggamer commented 2 months ago

The second one return result but all result max score equal 0. Output size of second model is [1,6,33600]

My process result func :

//categ_nums = 1; m_output_length = [1,6,33600] ;
// m_output_Result_Lenght  = m_output_length[0] * m_output_length[1] *m_output_length[2]
 Marshal.UnsafeAddrOfPinnedArrayElement(result, (4 + categ_nums) * m_output_Result_Lenght * b * 4);
 // Create OpenCV matrix for detection results of the current batch
 //4 mean: centerx,centery, width, height
 // 4 + categ_nums: 4 params of box and score of each classes.
 // 8400: total number of detection model ouputs per batch.
 // 
Mat result_data = new Mat(4 + categ_nums, m_output_length, MatType.CV_32F,
 Marshal.UnsafeAddrOfPinnedArrayElement(result, (4 + categ_nums) * m_output_length * b * 4), 4 * m_output_length);
 result_data = result_data.T(); // Transpose the matrix

 // // Lists to store positions, class IDs, and confidence scores
 List<Rect> position_boxes = new List<Rect>();
 List<int> class_ids = new List<int>();
 List<float> confidences = new List<float>();
 // Loop through each detection in the result data
 for (int i = 0; i < result_data.Rows; i++)
 {
     // Extract class scores for the current detection
     Mat classes_scores = new Mat(result_data, new Rect(4, i, categ_nums, 1));
     OpenCvSharp.Point max_classId_point, min_classId_point;
     double max_score, min_score;
     // Find the maximum score and its index
     Cv2.MinMaxLoc(classes_scores, out min_score, out max_score,
         out min_classId_point, out max_classId_point);
     // Confidence level between 0 ~ 1
     // Obtain identification box information
     //If the maximum score is above the threshold, process the detection
     if (max_score > 0.25)
     {
         Console.WriteLine(max_score);
         float cx = result_data.At<float>(i, 0);
         float cy = result_data.At<float>(i, 1);
         float ow = result_data.At<float>(i, 2);
         float oh = result_data.At<float>(i, 3);
         int x = (int)((cx - 0.5 * ow) * this.factors[b]);
         int y = (int)((cy - 0.5 * oh) * this.factors[b]);
         int width = (int)(ow * this.factors[b]);
         int height = (int)(oh * this.factors[b]);
         Rect box = new Rect();
         box.X = x;
         box.Y = y;
         box.Width = width;
         box.Height = height;
         position_boxes.Add(box);
         class_ids.Add(max_classId_point.X);
         confidences.Add((float)max_score);
     }
 }

With this func max_score always return 0;

Mininggamer commented 2 months ago

Here is my full code

public class OpenVinoModel
{
    Core core;
    Model model;
    CompiledModel compiled_model;
    InferRequest infer_request;
    private float[] factors;

    public static long[] input_size = { 1, 3, 640, 640 };
    public static int categ_nums = 2;
    private int m_output_length = 6048;
    private int m_output_Result_Lenght;
    public float det_thresh = 0.5f;
    public float det_nms_thresh = 0.5f;
    public float score = 0.5f;

    OpenVinoMetaDataParams MetaDataParams;

    public OpenVinoModel(string PathFileXML, string PathFileBin, string PathFileMetaData, int output_length = 6048,
        float score = 0.5f, int numsClass = 2, int Width = 640, int Height = 640, bool isMono = false)
    {
        this.m_output_length = output_length;
        this.score = score;
        categ_nums = numsClass;
        if (isMono)
        {
            input_size = new long[] { 1, 1, Width, Height };
        }
        else
        {
            input_size = new long[] { 1, 3, Width, Height };
        }

        if (!File.Exists(PathFileXML)) throw new FileNotFoundException("Not Found PathFileXML");
        if (!File.Exists(PathFileBin)) throw new FileNotFoundException("Not Found PathFileBin");
        //if (!File.Exists(PathFileMetaData)) throw new FileNotFoundException("Not Found PathFileMetaData");

        InitModel(PathFileXML, PathFileBin);
        //  InitParams(PathFileMetaData);
    }

    private void InitModel(string PathFileXML, string PathFileBin)
    {
        core = new Core();
        model = core.read_model(PathFileXML, PathFileBin);

        compiled_model = core.compile_model(model, "AUTO");
        infer_request = compiled_model.create_infer_request();

        Tensor input_tensor = infer_request.get_input_tensor();
        infer_request.infer();

        Tensor output_tensor = infer_request.get_output_tensor();
        input_size = input_tensor.get_shape().ToArray<long>();
        var output_size = output_tensor.get_shape().ToArray<long>();
        m_output_length = (int)output_size[2];
        m_output_Result_Lenght = (int) (output_size[2] * output_size[1] * output_size[0]);
    }

    public DetResult PredictToMat(Mat mat)
    {
        factors = new float[1];
        mat = OpenVinoSharp.Extensions.process.Resize.letterbox_img(mat, (int)input_size[2], out factors[0]);
        mat = letterbox_img(mat, (int)input_size[2], out factors[0]);
        mat = Normalize.run(mat, true);
        float[] input_data = Permute.run(mat);
        float[] output_data = infer(input_data);
      //  m_output_length = output_data.Length;
        return process_result(output_data, 1)[0];
        //4915200
    }

    public static Mat letterbox_img(Mat image, int length, out float scales)
    {
        int num = ((image.Cols > image.Rows) ? image.Cols : image.Rows);
        Mat mat = Mat.Zeros(num, num, MatType.CV_8UC3);
        mat *= 255.0;
        Rect roi = new Rect(0, 0, image.Cols, image.Rows);
        image.CopyTo(new Mat(mat, roi));
        Mat mat2 = new Mat();
        Cv2.Resize(mat, mat2, new OpenCvSharp.Size(length, length));
        scales = (float)num / (float)length;
        return mat2;
    }
    protected float[] infer(float[] input_data, long[] shape = null)
    {
        Tensor input_tensor = infer_request.get_input_tensor();
        var inputsize = input_tensor.get_size();
        if (shape != null)
            input_tensor.set_shape(new Shape(shape));
        input_tensor.set_data<float>(input_data);
        infer_request.infer();

        Tensor output_tensor = infer_request.get_output_tensor();
        string s = output_tensor.get_shape().to_string();
        float[] result = output_tensor.get_data<float>((int)output_tensor.get_size());
        return result;
    }
    public List<DetResult> process_result(float[] result, int batch)
    {
        // List to store results for each batch
        List<DetResult> re_result = new List<DetResult>();
        // Loop through each batch

        for (int b = 0; b < batch; ++b)
        {
            Marshal.UnsafeAddrOfPinnedArrayElement(result, (4 + categ_nums) * m_output_Result_Lenght * b * 4);
            // Create OpenCV matrix for detection results of the current batch
            //4 mean: centerx,centery, width, height
            // 4 + categ_nums: 4 params of box and score of each classes.
            // 8400: total number of detection model ouputs per batch.
            // 
            Mat result_data = new Mat(4 + categ_nums, m_output_length, MatType.CV_32F,
            Marshal.UnsafeAddrOfPinnedArrayElement(result, (4 + categ_nums) * m_output_length * b * 4), 4 * m_output_length);
            result_data = result_data.T(); // Transpose the matrix

            // // Lists to store positions, class IDs, and confidence scores
            List<Rect> position_boxes = new List<Rect>();
            List<int> class_ids = new List<int>();
            List<float> confidences = new List<float>();
            // Loop through each detection in the result data
            for (int i = 0; i < result_data.Rows; i++)
            {
                // Extract class scores for the current detection
                Mat classes_scores = new Mat(result_data, new Rect(4, i, categ_nums, 1));
                OpenCvSharp.Point max_classId_point, min_classId_point;
                double max_score, min_score;
                // Find the maximum score and its index
                Cv2.MinMaxLoc(classes_scores, out min_score, out max_score,
                    out min_classId_point, out max_classId_point);
                // Confidence level between 0 ~ 1
                // Obtain identification box information
                //If the maximum score is above the threshold, process the detection
                if (max_score > 0.25)
                {
                    Console.WriteLine(max_score);
                    float cx = result_data.At<float>(i, 0);
                    float cy = result_data.At<float>(i, 1);
                    float ow = result_data.At<float>(i, 2);
                    float oh = result_data.At<float>(i, 3);
                    int x = (int)((cx - 0.5 * ow) * this.factors[b]);
                    int y = (int)((cy - 0.5 * oh) * this.factors[b]);
                    int width = (int)(ow * this.factors[b]);
                    int height = (int)(oh * this.factors[b]);
                    Rect box = new Rect();
                    box.X = x;
                    box.Y = y;
                    box.Width = width;
                    box.Height = height;
                    position_boxes.Add(box);
                    class_ids.Add(max_classId_point.X);
                    confidences.Add((float)max_score);
                }
            }

            // NMS non maximum suppression
            int[] indexes = new int[position_boxes.Count];
            CvDnn.NMSBoxes(position_boxes, confidences, this.det_thresh, this.det_nms_thresh, out indexes);
            DetResult re = new DetResult();
            // 
            for (int i = 0; i < indexes.Length; i++)
            {
                int index = indexes[i];
                re.add(class_ids[index], confidences[index], position_boxes[index]);
            }
            re_result.Add(re);
        }
        return re_result;
    }
}

guojin-yan commented 1 month ago

Change the code to:

private int m_output_length = 33600;

Mininggamer commented 1 month ago

Change the code to:
private int m_output_length = 33600;
Thanks, i found out in some case i need to change color code from bgr to rgb so model can detect it.

guojin-yan / OpenVINO-CSharp-API

Need help. OpenVino can't detect image but pytorch still can. #31