Celebrandil / CudaSift

A CUDA implementation of SIFT for NVidia GPUs (1.2 ms on a GTX 1060)
MIT License
865 stars 286 forks source link

Spurious descriptors with same value for all dimensions returned #90

Open matlabbe opened 3 months ago

matlabbe commented 3 months ago

Here is an example of descriptors with index=3,4 and 5 respectively extracted from image below:

[...]
3 (866.034,395.743): [0.018431012, 0.022654165, 0.001292896, 0.00018135307, 0.00039497035, 0.00021760317, 0.0010791528, 
0.0021296688, 0.27287096, 0.27287096, 0.0065095401, 0, 1.288048e-05, 0.00017728232, 0.0032084715, 0.03477864, 
0.10331004, 0.27287096, 0.069609277, 0.00543859, 0.015576935, 0.0059009665, 0.0084429942, 0.030799719, 0.013091905, 
0.038849294, 0.026327742, 0.12741747, 0.065523386, 0.020241242, 0.021733649, 0.0070524537, 0.067081459, 0.081882581, 
0.011977402, 0.0002894534, 0.00054736191, 0.0011503156, 0.0013343397, 0.015055708, 0.27287096, 0.22552963, 
0.0046523307, 0.00016213191, 0.00018347373, 0.0030403808, 0.016056504, 0.16623564, 0.12662053, 0.065604091, 
0.011207361, 0.0024317687, 0.026926406, 0.07826148, 0.11381682, 0.10824063, 0.019543832, 0.037653793, 0.027442833, 
0.01186925, 0.044591628, 0.061915424, 0.055153154, 0.06278652, 0.015304182, 0.13441116, 0.012987037, 0.001891539, 
0.0020503632, 0.10972037, 0.0095784878, 0.0074747228, 0.20747438, 0.043018978, 0.004114273, 0.0011349234, 0.0015950568, 
0.064518824, 0.061598163, 0.27287096, 0.098229736, 0.0043143849, 0.00025641691, 0.0016941755, 0.015329037, 0.115928, 
0.13709423, 0.27287096, 0.092894562, 0.019033106, 0.0017949182, 0.00050019845, 0.0056443946, 0.037410554, 0.059109848, 
0.15061855, 3.9306593e-05, 0.0098794363, 8.1786347e-05, 0, 0, 0.11689584, 0.0017364604, 6.7152949e-05, 0.10730109, 
0.02577509, 0.00022224571, 0, 0.00060806476, 0.019309107, 0.0058838837, 0.11420429, 0.11735748, 0.0073898504, 
1.9566156e-05, 0.011859228, 0.0060749813, 0.024012648, 0.10699156, 0.27287096, 0.081037298, 0.010524067, 0, 0.0016941755, 
0.00025978978, 0.046104565, 0.164078, 0.12124742]

4 (1221.03,44.6283): [0.088388346, 0.088388346, 0.088388346, 0.088388346, 0.088388346, 0.088388346, 0.088388346, 
0.088388346, 0.088388346, 0.088388346, 0.088388346, 0.088388346, 0.088388346, 0.088388346, 0.088388346, 0.088388346, 
0.088388346, 0.088388346, 0.088388346, 0.088388346, 0.088388346, 0.088388346, 0.088388346, 0.088388346, 0.088388346, 
0.088388346, 0.088388346, 0.088388346, 0.088388346, 0.088388346, 0.088388346, 0.088388346, 0.088388346, 0.088388346, 
0.088388346, 0.088388346, 0.088388346, 0.088388346, 0.088388346, 0.088388346, 0.088388346, 0.088388346, 0.088388346, 
0.088388346, 0.088388346, 0.088388346, 0.088388346, 0.088388346, 0.088388346, 0.088388346, 0.088388346, 0.088388346, 
0.088388346, 0.088388346, 0.088388346, 0.088388346, 0.088388346, 0.088388346, 0.088388346, 0.088388346, 0.088388346, 
0.088388346, 0.088388346, 0.088388346, 0.088388346, 0.088388346, 0.088388346, 0.088388346, 0.088388346, 0.088388346, 
0.088388346, 0.088388346, 0.088388346, 0.088388346, 0.088388346, 0.088388346, 0.088388346, 0.088388346, 0.088388346, 
0.088388346, 0.088388346, 0.088388346, 0.088388346, 0.088388346, 0.088388346, 0.088388346, 0.088388346, 0.088388346, 
0.088388346, 0.088388346, 0.088388346, 0.088388346, 0.088388346, 0.088388346, 0.088388346, 0.088388346, 0.088388346, 
0.088388346, 0.088388346, 0.088388346, 0.088388346, 0.088388346, 0.088388346, 0.088388346, 0.088388346, 0.088388346, 
0.088388346, 0.088388346, 0.088388346, 0.088388346, 0.088388346, 0.088388346, 0.088388346, 0.088388346, 0.088388346, 
0.088388346, 0.088388346, 0.088388346, 0.088388346, 0.088388346, 0.088388346, 0.088388346, 0.088388346, 0.088388346, 
0.088388346, 0.088388346, 0.088388346, 0.088388346]

5 (884.065,449.743): [0, 0, 0.13537027, 0.0033544558, 0, 0, 0.033662248, 0.0011577448, 0.004906931, 0.011771171, 0.14170672, 
0.039704446, 0, 0, 0.036057834, 0.0056075677, 0.049803764, 0.26877078, 0.10042188, 0.026234511, 0, 0, 0.0038559283, 
0.0063854186, 0.026438788, 0.26877078, 0.20651931, 0.0045134551, 0, 0, 0, 0.0010076567, 0, 6.738584e-06, 0.017946303, 
0.00074194063, 0, 0, 0.16726051, 0.0034812309, 0.093496121, 0.018124178, 0.020389127, 0.012388289, 0, 0, 0.18586257, 
0.049181066, 0.26877078, 0.26877078, 0.05231693, 0.0077746594, 0, 9.0443988e-05, 0.022688275, 0.086045437, 0.13326915, 
0.26877078, 0.028001824, 0, 0, 0.00063310791, 0.039567344, 0.06876301, 0, 1.9160625e-05, 0.049874086, 0.00093072135, 0, 0, 
0.0086138248, 2.0144742e-05, 0.16387022, 0.19095454, 0.035228197, 0.00061852741, 0.00043775453, 0.00097662851, 
0.010135029, 0.0068303193, 0.26877078, 0.26877078, 0.0067989454, 0.0001015966, 0.0030642815, 0.011715915, 0.057399314, 
0.146393, 0.13666546, 0.021549553, 0.00031222895, 1.4513802e-05, 0, 0.012945466, 0.13887441, 0.10978649, 0, 
0.00016424547, 0.078159578, 0.0014171483, 0, 0, 0, 0, 0.17956674, 0.068812318, 0.052595567, 0.00093720644, 0.00072959083,
 0.0016443207, 0.0052100122, 0.0516482, 0.19253092, 0.084944114, 0.008682115, 0.00071117631, 0.005107136, 0.012116875, 
0.046686113, 0.13527805, 0.12833008, 0.10524935, 0.012637743, 0.0001015966, 0, 0.0012162442, 0.010251811, 0.018618299]
[...]

newcollege

Note how the descriptors with index=4 has same value (always 0.088388346, or 243 4 181 61 if that float value is shown as 4 bytes) for all dimensions. This is causing issue with bag-of-words algorithms where unrelated images get high matching score because of that. How is it possible or is it expected when descriptor step fails? It feels like memory is initialized to same value but never touched again when creating the descriptor. Downstream, my current workaround is just to detect that kind of pattern and ignore the descriptor.

There are the modifications to reproduce the issue with the mainSift.cpp example code and image above:

diff --git a/mainSift.cpp b/mainSift.cpp
index 0e4567a..2d9455f 100644
--- a/mainSift.cpp
+++ b/mainSift.cpp
@@ -36,7 +36,7 @@ int main(int argc, char **argv)
     cv::imread("data/left.pgm", 0).convertTo(limg, CV_32FC1);
     cv::imread("data/righ.pgm", 0).convertTo(rimg, CV_32FC1);
   } else {
-    cv::imread("data/img1.png", 0).convertTo(limg, CV_32FC1);
+    cv::imread("data/newcollege.jpg", 0).convertTo(limg, CV_32FC1);
     cv::imread("data/img2.png", 0).convertTo(rimg, CV_32FC1);
   }
   //cv::flip(limg, rimg, -1);
@@ -65,6 +65,13 @@ int main(int argc, char **argv)
   float *memoryTmp = AllocSiftTempMemory(w, h, 5, false);
     for (int i=0;i<1000;i++) {
       ExtractSift(siftData1, img1, 5, initBlur, thresh, 0.0f, false, memoryTmp);
+      // TEST BEGIN: show descriptors of first image
+      for(int j=0; j<siftData1.numPts; ++j)
+      {
+        std::cout << j << " (" << siftData1.h_data[j].xpos << "," << siftData1.h_data[j].ypos << ")" << ": " << cv::Mat(1,128,CV_32FC1, siftData1.h_data[j].data) << std::endl << std::endl;
+      }
+      exit(1);
+      /// TEST END
       ExtractSift(siftData2, img2, 5, initBlur, thresh, 0.0f, false, memoryTmp);
     }
     FreeSiftTempMemory(memoryTmp);

The image resolution is 1280x480, so it may not be related to pitch padding like in this post: https://github.com/Celebrandil/CudaSift/issues/41#issuecomment-420625571