EnoxSoftware / OpenCVForUnity

OpenCV for Unity (Untiy Asset Plugin)
https://assetstore.unity.com/packages/tools/integration/opencv-for-unity-21088
558 stars 175 forks source link

Unable to use FlannBasedMatcher #187

Closed Rombond closed 5 months ago

Rombond commented 5 months ago

Hi, i have this python script fully working, and i tried to implement it on Unity:

import cv2
import numpy as np

# Load the template image (the one to be detected)
template_image_path = 'PrintMe_resized.jpg'  # Replace with the path to your image
template = cv2.imread(template_image_path, cv2.IMREAD_GRAYSCALE)

# Initialize ORB with a high number of key points
orb = cv2.ORB_create(nfeatures=10000, edgeThreshold=10, patchSize=10)

# Find key points and descriptors in the template image
kp1, des1 = orb.detectAndCompute(template, None)
template_image = cv2.Mat(template)
print(len(kp1))
template_with_keypoints = cv2.drawKeypoints(template_image, kp1, None, color=(0, 255, 0))

# Parameters for FLANN
FLANN_INDEX_LSH = 6
index_params = dict(algorithm=FLANN_INDEX_LSH,
                    table_number=6,  # 12
                    key_size=12,     # 20
                    multi_probe_level=2)  # 2
search_params = dict(checks=100)

flann = cv2.FlannBasedMatcher(index_params, search_params)

# Capture the webcam video stream
cap = cv2.VideoCapture(0)

# Variables to store previous matches
previous_good_matches = None

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Convert the frame to grayscale
    gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    # Find key points and descriptors in the frame
    kp2, des2 = orb.detectAndCompute(gray_frame, None)

    if des2 is not None and len(des2) > 1:
        # Find matches with k=2
        matches = flann.knnMatch(des1, des2, k=2)

        # Apply Lowe's ratio test
        good_matches = []
        for match in matches:
            if len(match) == 2:
                m, n = match
                if m.distance < 0.7 * n.distance:
                    good_matches.append(m)

        if len(good_matches) > 10:  # Threshold of sufficient matches
            src_pts = np.float32([kp1[m.queryIdx].pt for m in good_matches]).reshape(-1, 1, 2)
            dst_pts = np.float32([kp2[m.trainIdx].pt for m in good_matches]).reshape(-1, 1, 2)

            # Update previous matches
            previous_good_matches = good_matches
        elif previous_good_matches is not None:
            # Use previous matches if the new ones are not sufficient
            src_pts = np.float32([kp1[m.queryIdx].pt for m in previous_good_matches]).reshape(-1, 1, 2)
            dst_pts = np.float32([kp2[m.trainIdx].pt for m in previous_good_matches]).reshape(-1, 1, 2)
        # Compute the homography
        M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0)
        if M is not None:
            matchesMask = mask.ravel().tolist()

            h, w = template.shape
            pts = np.float32([[0, 0], [0, h - 1], [w - 1, h - 1], [w - 1, 0]]).reshape(-1, 1, 2)
            dst = cv2.perspectiveTransform(pts, M)

            # Draw a rectangle around the detected object
            frame = cv2.polylines(frame, [np.int32(dst)], True, (0, 255, 0), 3, cv2.LINE_AA)

    # Display the frame
    cv2.imshow('Video Frame', frame)
    cv2.imshow('Template with Keypoints', template_with_keypoints)

    # Exit the loop if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the capture and close the windows
cap.release()
cv2.destroyAllWindows()

Actually i'm with this:

using UnityEngine;
using OpenCVForUnity.CoreModule;
using OpenCVForUnity.ObjdetectModule;
using OpenCVForUnity.ImgprocModule;
using OpenCVForUnity.UnityUtils;
using OpenCVForUnity.UnityUtils.Helper;
using OpenCVForUnity.Calib3dModule;
using System.Collections;
using System.Collections.Generic;
using OpenCVForUnity.Features2dModule;
using System.Linq;
using static TMPro.SpriteAssetUtilities.TexturePacker_JsonArray;
using System.ComponentModel;
using System.Security.Cryptography;

[RequireComponent(typeof(WebCamTextureToMatHelper))]
public class CameraMatchingPose : MonoBehaviour
{
    float width = 1920f;
    float height = 1080f;

    WebCamTextureToMatHelper webCamTextureToMatHelper;
    ORB orb;
    Mat grayMat;
    Texture2D texture;
    Mat points = new Mat(4, 1, CvType.CV_64FC1);
    Mat camMatrix;
    MatOfDouble distCoeffs;

    List<DMatch> previousGoodMatches;
    FlannBasedMatcher flann;
    bool inited = false;
    MatOfKeyPoint kp1 = new MatOfKeyPoint();
    Mat des1 = new Mat();

    [Header("References")]
    public Camera ARCamera;
    [SerializeField] Trackable trackable;
    [Header("Detection")]
    [SerializeField] int nfeatures = 10000;
    [SerializeField] int minGoodMatches = 10;
    [SerializeField] int borderSize = 10;
    [Header("Debug")]
    [SerializeField] bool detected = false;

    // Start is called before the first frame update
    void Start()
    {
        if (trackable == null) { trackable = FindObjectOfType<Trackable>(); }
        orb = ORB.create(nfeatures);
        orb.setEdgeThreshold(borderSize);
        orb.setPatchSize(borderSize);
        flann = FlannBasedMatcher.create();

        webCamTextureToMatHelper = GetComponent<WebCamTextureToMatHelper>();
        points = new Mat(4, 1, CvType.CV_64FC1);
        webCamTextureToMatHelper.Initialize();
    }

    // Update is called once per frame
    void Update()
    {
        if (!inited && trackable.template != null)
        {
            orb.detectAndCompute(trackable.template, new Mat(), kp1, des1);
            List<Mat> descriptors = new List<Mat>(1)
            {
                des1.clone()
            };
            flann.add(descriptors);
            flann.train();
            inited = true;
        }
        if (webCamTextureToMatHelper.IsPlaying() && webCamTextureToMatHelper.DidUpdateThisFrame() && inited)
        {
            Mat rgbaMat = webCamTextureToMatHelper.GetMat();
            Utils.matToTexture2D(rgbaMat, texture);
            Imgproc.cvtColor(rgbaMat, grayMat, Imgproc.COLOR_RGBA2GRAY);

            MatOfKeyPoint kp2 = new MatOfKeyPoint();
            Mat des2 = new Mat();
            orb.detectAndCompute(grayMat, new Mat(), kp2, des2);

            List<MatOfDMatch> matches = new List<MatOfDMatch>();
            flann.knnMatch(des2, matches, 2);

            List<DMatch> goodMatches = new List<DMatch>();
            foreach (MatOfDMatch match in matches)
            {
                var matchs = match.toArray();
                if (matchs.Length == 2 && matchs[0].distance < 0.7 * matchs[1].distance)
                    goodMatches.Add(matchs[0]);
            }

            // Extraire les points clés correspondants
            List<Point> srcPtsList = new List<Point>();
            List<Point> dstPtsList = new List<Point>();
            if (goodMatches.Count > minGoodMatches)
            {
                foreach (DMatch match in goodMatches)
                {
                    srcPtsList.Add(kp1.toList()[match.queryIdx].pt);
                    dstPtsList.Add(kp2.toList()[match.trainIdx].pt);
                }

                // Mettre à jour les correspondances précédentes
                previousGoodMatches = goodMatches;
            }
            else if (previousGoodMatches != null)
            {
                foreach (DMatch match in previousGoodMatches)
                {
                    srcPtsList.Add(kp1.toList()[match.queryIdx].pt);
                    dstPtsList.Add(kp2.toList()[match.trainIdx].pt);
                }
            }
            MatOfPoint2f srcPts = new MatOfPoint2f(srcPtsList.ToArray());
            MatOfPoint2f dstPts = new MatOfPoint2f(dstPtsList.ToArray());

            // Calculer l'homographie
            Mat mask = new Mat();
            Mat M = Calib3d.findHomography(srcPts, dstPts, Calib3d.RANSAC, 5.0f, mask);

            if (!M.empty())
            {
                // Dessiner un rectangle autour de l'objet détecté
                MatOfPoint2f templateCorners = new MatOfPoint2f(new Point(0, 0), new Point(0, trackable.template.rows()),
                    new Point(trackable.template.cols(), trackable.template.rows()), new Point(trackable.template.cols(), 0));
                MatOfPoint2f sceneCorners = new MatOfPoint2f();
                Core.perspectiveTransform(templateCorners, sceneCorners, M);
                Debug.Log(sceneCorners.dump());
                //Imgproc.polylines(frame, new List<MatOfPoint>() { new MatOfPoint(sceneCorners.toArray()) }, true, new Scalar(0, 255, 0), 3);
            }
        }
    }

    public void OnWebCamTextureToMatHelperInitialized()
    {
        Mat webCamTextureMat = webCamTextureToMatHelper.GetMat();

        texture = new Texture2D(webCamTextureMat.cols(), webCamTextureMat.rows(), TextureFormat.RGBA32, false);
        Utils.matToTexture2D(webCamTextureMat, texture);

        GetComponent<Renderer>().material.mainTexture = texture;

        grayMat = new Mat(webCamTextureMat.rows(), webCamTextureMat.cols(), CvType.CV_8UC1);

        gameObject.transform.localScale = new Vector3(webCamTextureMat.cols(), webCamTextureMat.rows(), 1);
        width = webCamTextureToMatHelper.GetMat().width();
        height = webCamTextureToMatHelper.GetMat().height();

        float imageSizeScale = 1.0f;
        float widthScale = (float)Screen.width / width;
        float heightScale = (float)Screen.height / height;
        if (widthScale < heightScale)
        {
            Camera.main.orthographicSize = (width * (float)Screen.height / (float)Screen.width) / 2;
            imageSizeScale = (float)Screen.height / (float)Screen.width;
        }
        else
        {
            Camera.main.orthographicSize = height / 2;
        }

        //set cameraparam
        int max_d = (int)Mathf.Max(width, height);
        double fx = max_d;
        double fy = max_d;
        double cx = width / 2.0f;
        double cy = height / 2.0f;
        camMatrix = new Mat(3, 3, CvType.CV_64FC1);
        camMatrix.put(0, 0, fx);
        camMatrix.put(0, 1, 0);
        camMatrix.put(0, 2, cx);
        camMatrix.put(1, 0, 0);
        camMatrix.put(1, 1, fy);
        camMatrix.put(1, 2, cy);
        camMatrix.put(2, 0, 0);
        camMatrix.put(2, 1, 0);
        camMatrix.put(2, 2, 1.0f);
        Debug.Log("camMatrix " + camMatrix.dump());

        distCoeffs = new MatOfDouble(0, 0, 0, 0);
        Debug.Log("distCoeffs " + distCoeffs.dump());

        //calibration camera
        Size imageSize = new Size(width * imageSizeScale, height * imageSizeScale);
        double apertureWidth = 0;
        double apertureHeight = 0;
        double[] fovx = new double[1];
        double[] fovy = new double[1];
        double[] focalLength = new double[1];
        Point principalPoint = new Point(0, 0);
        double[] aspectratio = new double[1];
        Calib3d.calibrationMatrixValues(camMatrix, imageSize, apertureWidth, apertureHeight, fovx, fovy, focalLength, principalPoint, aspectratio);

        //To convert the difference of the FOV value of the OpenCV and Unity. 
        double fovXScale = (2.0 * Mathf.Atan((float)(imageSize.width / (2.0 * fx)))) / (Mathf.Atan2((float)cx, (float)fx) + Mathf.Atan2((float)(imageSize.width - cx), (float)fx));
        double fovYScale = (2.0 * Mathf.Atan((float)(imageSize.height / (2.0 * fy)))) / (Mathf.Atan2((float)cy, (float)fy) + Mathf.Atan2((float)(imageSize.height - cy), (float)fy));
        if (widthScale < heightScale)
        {
            ARCamera.fieldOfView = (float)(fovx[0] * fovXScale);
        }
        else
        {
            ARCamera.fieldOfView = (float)(fovy[0] * fovYScale);
        }
    }

    public void OnWebCamTextureToMatHelperDisposed()
    {
        Debug.Log("OnWebCamTextureToMatHelperDisposed");

        if (grayMat != null)
            grayMat.Dispose();

        if (texture != null)
        {
            Texture2D.Destroy(texture);
            texture = null;
        }

        if (points != null)
            points.Dispose();

        camMatrix.Dispose();
        distCoeffs.Dispose();
    }

    public void OnWebCamTextureToMatHelperErrorOccurred(WebCamTextureToMatHelper.ErrorCode errorCode)
    {
        Debug.Log("OnWebCamTextureToMatHelperErrorOccurred " + errorCode);
    }
}

But when i run it, i always get errors at flann.knnMatch(des2, matches, 2);:

CvException: CvType.CV_32SC2 != m.type() || m.cols()!=1 Mat [ -1-1CV_8UC1, isCont=False, isSubmat=False, nativeObj=0x3168834043920, dataAddr=0x0 ] OpenCVForUnity.UtilsModule.Converters.Mat_to_vector_vector_DMatch (OpenCVForUnity.CoreModule.Mat m, System.Collections.Generic.List1[T] lvdm) (at Assets/OpenCVForUnity/org/opencv/utils/Converters.cs:688) OpenCVForUnity.Features2dModule.DescriptorMatcher.knnMatch (OpenCVForUnity.CoreModule.Mat queryDescriptors, System.Collections.Generic.List1[T] matches, System.Int32 k) (at Assets/OpenCVForUnity/org/opencv/features2d/DescriptorMatcher.cs:550)

Did i miss something ?

EnoxSoftware commented 5 months ago

Thank you very much for reporting.

Could you try the attachment? FlannBasedMatcher_read_yml_ORB_Example.zip

This example is a C# conversion of the code on the following page, with Detector changed to ORB. https://docs.opencv.org/3.4/d5/d6f/tutorial_feature_flann_matcher.html

I assume that the error in your code is probably due to incorrectly set FlannBasedMatcher parameters.

python

# Parameters for FLANN
FLANN_INDEX_LSH = 6
index_params = dict(algorithm=FLANN_INDEX_LSH,
                    table_number=6,  # 12
                    key_size=12,     # 20
                    multi_probe_level=2)  # 2
search_params = dict(checks=100)

flann = cv2.FlannBasedMatcher(index_params, search_params)

C#

var flann = FlannBasedMatcher.create();
flann.read(Utils.getFilePath("conf.yml"));

Regerds, EnoxSoftware

Rombond commented 5 months ago

Oh yeah it was the missing yml Thank you for your help !

Rombond commented 5 months ago

Sorry to disturb you again, but there is still a problem between the python version and the C#

Python: Screenshot 2024-05-31 174705

C#: Screenshot 2024-05-31 174644

Python script hasn't changes, C# script change a little bit, i used Thread to process frame, and i use LineRenderer to show my "square".

Here how i get each points:

pa = new Vector2((float)sceneCorners.get(1, 0)[0], (float)sceneCorners.get(1, 0)[1]);
pb = new Vector2((float)sceneCorners.get(2, 0)[0], (float)sceneCorners.get(2, 0)[1]);
pc = new Vector2((float)sceneCorners.get(3, 0)[0], (float)sceneCorners.get(3, 0)[1]);
pd = new Vector2((float)sceneCorners.get(0, 0)[0], (float)sceneCorners.get(0, 0)[1]);
EnoxSoftware commented 5 months ago

If you used Imgproc.polylines in the c# code to draw squares on the image, did you get the same results as in the python code?

LineRenderer is a method for drawing lines in Untiy's world 3D coordinate system, so it is different from OpenCV's coordinate system, plus the camera position and perspective factors also affect it. It cannot be replaced as is.

Rombond commented 5 months ago

I wan't able to use polylines, it askes a List<MatOfPoint> but every times i create one, values are rounded. Here how i did it in MatchPic.cs:

Mat H = Calib3d.findHomography(obj, scene, Calib3d.RANSAC, 5);

if (!H.empty())
{
    // Dessiner un rectangle autour de l'objet détecté
    MatOfPoint2f templateCorners = new MatOfPoint2f(new Point(0, 0), new Point(0, 1023), new Point(1023, 1023), new Point(1023, 0));
    MatOfPoint2f sceneCorners = new MatOfPoint2f();
    Core.perspectiveTransform(templateCorners, sceneCorners, H);
    MatOfPoint test = new MatOfPoint(new Point(sceneCorners.get(0, 0)[0], sceneCorners.get(0, 0)[1]), new Point(sceneCorners.get(1, 0)[0], sceneCorners.get(1, 0)[1]), new Point(sceneCorners.get(2, 0)[0], sceneCorners.get(2, 0)[1]), new Point(sceneCorners.get(3, 0)[0], sceneCorners.get(3, 0)[1]));
    Debug.Log(sceneCorners.dump());
    List<MatOfPoint> lst = new List<MatOfPoint>
    {
        new MatOfPoint(new Point(sceneCorners.get(0, 0)[0], sceneCorners.get(0, 0)[1])),
        new MatOfPoint(new Point(sceneCorners.get(1, 0)[0], sceneCorners.get(1, 0)[1])),
        new MatOfPoint(new Point(sceneCorners.get(2, 0)[0], sceneCorners.get(2, 0)[1])),
        new MatOfPoint(new Point(sceneCorners.get(3, 0)[0], sceneCorners.get(3, 0)[1]))
    };
    Imgproc.polylines(img2Mat, lst, true, new Scalar(0, 255, 0), 10);
}

I also tried:

MatOfPoint test = new MatOfPoint(new Point(sceneCorners.get(0, 0)[0], sceneCorners.get(0, 0)[1]), new Point(sceneCorners.get(1, 0)[0], sceneCorners.get(1, 0)[1]), new Point(sceneCorners.get(2, 0)[0], sceneCorners.get(2, 0)[1]), new Point(sceneCorners.get(3, 0)[0], sceneCorners.get(3, 0)[1]));
List<MatOfPoint> lst = new List<MatOfPoint>
{
    test
};
Imgproc.polylines(img2Mat, lst, true, new Scalar(0, 255, 0), 10);

And:

MatOfPoint test = new MatOfPoint();
test.fromArray(sceneCorners.toArray());
List<MatOfPoint> lst = new List<MatOfPoint>
{
    test
};
Imgproc.polylines(img2Mat, lst, true, new Scalar(0, 255, 0), 10);
EnoxSoftware commented 5 months ago

The MatOfPoint type that the Imgproc.polylines method takes as an argument holds a numeric value as an int type. And since Imgproc.polylines is a drawing function for image mat, it is not considered a problem to round the numeric value of the point cloud coordinates to an Int type. This is the specification of the Java version of OpenCV, from which our assets are derived.

Rombond commented 5 months ago

It works with polylines, so i think it's just an issue with LineRenderer Thanks for you help !