guojin-yan / OpenVINO-CSharp-API

OpenVINO wrapper for .NET.
Apache License 2.0
137 stars 35 forks source link

Need help. OpenVino can't detect image but pytorch still can. #31

Closed Mininggamer closed 1 month ago

Mininggamer commented 2 months ago

My OpenVino Class https://drive.google.com/file/d/1JkGhYVcMYk9i-jp7egxq2R9PXSWkvbrc/view?usp=sharing First And Second Model https://drive.google.com/file/d/1jDzB7avVJihtKK2lViNW0sI3SvKSqxdA/view?usp=sharing https://drive.google.com/file/d/1mnrbZc1MS41MN3fz74YxjjqYexs9Mk89/view?usp=sharing

guojin-yan commented 2 months ago

May I ask what is the output result of your program? Is there no result or is the result wrong. If the result is incorrect, you can check if there is a problem with the result processing.

Mininggamer commented 2 months ago

The second one return result but all result max score equal 0. Output size of second model is [1,6,33600] image

My process result func :

//categ_nums = 1; m_output_length = [1,6,33600] ;
// m_output_Result_Lenght  = m_output_length[0] * m_output_length[1] *m_output_length[2]
 Marshal.UnsafeAddrOfPinnedArrayElement(result, (4 + categ_nums) * m_output_Result_Lenght * b * 4);
 // Create OpenCV matrix for detection results of the current batch
 //4 mean: centerx,centery, width, height
 // 4 + categ_nums: 4 params of box and score of each classes.
 // 8400: total number of detection model ouputs per batch.
 // 
Mat result_data = new Mat(4 + categ_nums, m_output_length, MatType.CV_32F,
 Marshal.UnsafeAddrOfPinnedArrayElement(result, (4 + categ_nums) * m_output_length * b * 4), 4 * m_output_length);
 result_data = result_data.T(); // Transpose the matrix

 // // Lists to store positions, class IDs, and confidence scores
 List<Rect> position_boxes = new List<Rect>();
 List<int> class_ids = new List<int>();
 List<float> confidences = new List<float>();
 // Loop through each detection in the result data
 for (int i = 0; i < result_data.Rows; i++)
 {
     // Extract class scores for the current detection
     Mat classes_scores = new Mat(result_data, new Rect(4, i, categ_nums, 1));
     OpenCvSharp.Point max_classId_point, min_classId_point;
     double max_score, min_score;
     // Find the maximum score and its index
     Cv2.MinMaxLoc(classes_scores, out min_score, out max_score,
         out min_classId_point, out max_classId_point);
     // Confidence level between 0 ~ 1
     // Obtain identification box information
     //If the maximum score is above the threshold, process the detection
     if (max_score > 0.25)
     {
         Console.WriteLine(max_score);
         float cx = result_data.At<float>(i, 0);
         float cy = result_data.At<float>(i, 1);
         float ow = result_data.At<float>(i, 2);
         float oh = result_data.At<float>(i, 3);
         int x = (int)((cx - 0.5 * ow) * this.factors[b]);
         int y = (int)((cy - 0.5 * oh) * this.factors[b]);
         int width = (int)(ow * this.factors[b]);
         int height = (int)(oh * this.factors[b]);
         Rect box = new Rect();
         box.X = x;
         box.Y = y;
         box.Width = width;
         box.Height = height;
         position_boxes.Add(box);
         class_ids.Add(max_classId_point.X);
         confidences.Add((float)max_score);
     }
 }
Mininggamer commented 2 months ago

Here is my full code

public class OpenVinoModel
{
    Core core;
    Model model;
    CompiledModel compiled_model;
    InferRequest infer_request;
    private float[] factors;

    public static long[] input_size = { 1, 3, 640, 640 };
    public static int categ_nums = 2;
    private int m_output_length = 6048;
    private int m_output_Result_Lenght;
    public float det_thresh = 0.5f;
    public float det_nms_thresh = 0.5f;
    public float score = 0.5f;

    OpenVinoMetaDataParams MetaDataParams;

    public OpenVinoModel(string PathFileXML, string PathFileBin, string PathFileMetaData, int output_length = 6048,
        float score = 0.5f, int numsClass = 2, int Width = 640, int Height = 640, bool isMono = false)
    {
        this.m_output_length = output_length;
        this.score = score;
        categ_nums = numsClass;
        if (isMono)
        {
            input_size = new long[] { 1, 1, Width, Height };
        }
        else
        {
            input_size = new long[] { 1, 3, Width, Height };
        }

        if (!File.Exists(PathFileXML)) throw new FileNotFoundException("Not Found PathFileXML");
        if (!File.Exists(PathFileBin)) throw new FileNotFoundException("Not Found PathFileBin");
        //if (!File.Exists(PathFileMetaData)) throw new FileNotFoundException("Not Found PathFileMetaData");

        InitModel(PathFileXML, PathFileBin);
        //  InitParams(PathFileMetaData);
    }

    private void InitModel(string PathFileXML, string PathFileBin)
    {
        core = new Core();
        model = core.read_model(PathFileXML, PathFileBin);

        compiled_model = core.compile_model(model, "AUTO");
        infer_request = compiled_model.create_infer_request();

        Tensor input_tensor = infer_request.get_input_tensor();
        infer_request.infer();

        Tensor output_tensor = infer_request.get_output_tensor();
        input_size = input_tensor.get_shape().ToArray<long>();
        var output_size = output_tensor.get_shape().ToArray<long>();
        m_output_length = (int)output_size[2];
        m_output_Result_Lenght = (int) (output_size[2] * output_size[1] * output_size[0]);
    }

    public DetResult PredictToMat(Mat mat)
    {
        factors = new float[1];
        mat = OpenVinoSharp.Extensions.process.Resize.letterbox_img(mat, (int)input_size[2], out factors[0]);
        mat = letterbox_img(mat, (int)input_size[2], out factors[0]);
        mat = Normalize.run(mat, true);
        float[] input_data = Permute.run(mat);
        float[] output_data = infer(input_data);
      //  m_output_length = output_data.Length;
        return process_result(output_data, 1)[0];
        //4915200
    }

    public static Mat letterbox_img(Mat image, int length, out float scales)
    {
        int num = ((image.Cols > image.Rows) ? image.Cols : image.Rows);
        Mat mat = Mat.Zeros(num, num, MatType.CV_8UC3);
        mat *= 255.0;
        Rect roi = new Rect(0, 0, image.Cols, image.Rows);
        image.CopyTo(new Mat(mat, roi));
        Mat mat2 = new Mat();
        Cv2.Resize(mat, mat2, new OpenCvSharp.Size(length, length));
        scales = (float)num / (float)length;
        return mat2;
    }
    protected float[] infer(float[] input_data, long[] shape = null)
    {
        Tensor input_tensor = infer_request.get_input_tensor();
        var inputsize = input_tensor.get_size();
        if (shape != null)
            input_tensor.set_shape(new Shape(shape));
        input_tensor.set_data<float>(input_data);
        infer_request.infer();

        Tensor output_tensor = infer_request.get_output_tensor();
        string s = output_tensor.get_shape().to_string();
        float[] result = output_tensor.get_data<float>((int)output_tensor.get_size());
        return result;
    }
    public List<DetResult> process_result(float[] result, int batch)
    {
        // List to store results for each batch
        List<DetResult> re_result = new List<DetResult>();
        // Loop through each batch

        for (int b = 0; b < batch; ++b)
        {
            Marshal.UnsafeAddrOfPinnedArrayElement(result, (4 + categ_nums) * m_output_Result_Lenght * b * 4);
            // Create OpenCV matrix for detection results of the current batch
            //4 mean: centerx,centery, width, height
            // 4 + categ_nums: 4 params of box and score of each classes.
            // 8400: total number of detection model ouputs per batch.
            // 
            Mat result_data = new Mat(4 + categ_nums, m_output_length, MatType.CV_32F,
            Marshal.UnsafeAddrOfPinnedArrayElement(result, (4 + categ_nums) * m_output_length * b * 4), 4 * m_output_length);
            result_data = result_data.T(); // Transpose the matrix

            // // Lists to store positions, class IDs, and confidence scores
            List<Rect> position_boxes = new List<Rect>();
            List<int> class_ids = new List<int>();
            List<float> confidences = new List<float>();
            // Loop through each detection in the result data
            for (int i = 0; i < result_data.Rows; i++)
            {
                // Extract class scores for the current detection
                Mat classes_scores = new Mat(result_data, new Rect(4, i, categ_nums, 1));
                OpenCvSharp.Point max_classId_point, min_classId_point;
                double max_score, min_score;
                // Find the maximum score and its index
                Cv2.MinMaxLoc(classes_scores, out min_score, out max_score,
                    out min_classId_point, out max_classId_point);
                // Confidence level between 0 ~ 1
                // Obtain identification box information
                //If the maximum score is above the threshold, process the detection
                if (max_score > 0.25)
                {
                    Console.WriteLine(max_score);
                    float cx = result_data.At<float>(i, 0);
                    float cy = result_data.At<float>(i, 1);
                    float ow = result_data.At<float>(i, 2);
                    float oh = result_data.At<float>(i, 3);
                    int x = (int)((cx - 0.5 * ow) * this.factors[b]);
                    int y = (int)((cy - 0.5 * oh) * this.factors[b]);
                    int width = (int)(ow * this.factors[b]);
                    int height = (int)(oh * this.factors[b]);
                    Rect box = new Rect();
                    box.X = x;
                    box.Y = y;
                    box.Width = width;
                    box.Height = height;
                    position_boxes.Add(box);
                    class_ids.Add(max_classId_point.X);
                    confidences.Add((float)max_score);
                }
            }

            // NMS non maximum suppression
            int[] indexes = new int[position_boxes.Count];
            CvDnn.NMSBoxes(position_boxes, confidences, this.det_thresh, this.det_nms_thresh, out indexes);
            DetResult re = new DetResult();
            // 
            for (int i = 0; i < indexes.Length; i++)
            {
                int index = indexes[i];
                re.add(class_ids[index], confidences[index], position_boxes[index]);
            }
            re_result.Add(re);
        }
        return re_result;
    }
}
guojin-yan commented 1 month ago

Change the code to:

private int m_output_length = 33600;
Mininggamer commented 1 month ago

Change the code to:

private int m_output_length = 33600;

Thanks, i found out in some case i need to change color code from bgr to rgb so model can detect it.