facebookresearch / ContactPose

Large dataset of hand-object contact, hand- and object-pose, and 2.9 M RGB-D grasp images.
http://contactpose.cc.gatech.edu/
MIT License
338 stars 33 forks source link

Frame transformation related problem #23

Closed RitchieQi closed 2 years ago

RitchieQi commented 2 years ago

Thanks a lot for the dataset! I have a question about the transformation from oth to cth. I tried to use the following transformation equation to get the 3D joints coordinates w.r.t camera, but the results seems not lie in the camera frame.

(np.linalg.inv(self._cTo[camera_name][frame_idx]) @ np.vstack(self._oX[frame_idx][hand_idx].T, np.ones(len(self._oX[frame_idx][hand_idx])))).T[:,:3]

Here is the output:

kinect2_left
(array([[0.59586708, 0.57183709, 0.27451977],
       [0.57346818, 0.55475388, 0.25108291],
       [0.55723432, 0.54825791, 0.22123297],
       [0.54819433, 0.56737579, 0.19576846],
       [0.53468998, 0.58279761, 0.18603394],
       [0.56498216, 0.59200997, 0.20027606],
       [0.53212793, 0.60429002, 0.17582905],
       [0.50474777, 0.60782087, 0.16985572],
       [0.48186989, 0.61010318, 0.17019179],
       [0.57017289, 0.6114515 , 0.20931277],
       [0.5364321 , 0.62565416, 0.18285898],
       [0.50638421, 0.63366412, 0.17872098],
       [0.48237124, 0.63816475, 0.18142182],
       [0.57398631, 0.62594793, 0.22155912],
       [0.54427922, 0.6428173 , 0.19770902],
       [0.51716611, 0.64726235, 0.19323866],
       [0.49550774, 0.6499683 , 0.19336389],
       [0.57512528, 0.63672196, 0.23706421],
       [0.55397417, 0.65150359, 0.21775499],
       [0.53727593, 0.65383735, 0.21031583],
       [0.52000723, 0.65770182, 0.20670111]]), 
array([[0.56802887, 0.60250985, 0.34944253],
       [0.53132207, 0.59137744, 0.34292538],
       [0.49572316, 0.60188097, 0.33573266],
       [0.47328403, 0.62413895, 0.32591359],
       [0.46454028, 0.64074109, 0.30549669],
       [0.49526903, 0.64764002, 0.3199572 ],
       [0.46599222, 0.66376217, 0.29363614],
       [0.45486717, 0.66508935, 0.2700846 ],
       [0.44823189, 0.663057  , 0.24861838],
       [0.51577966, 0.66108361, 0.31025374],
       [0.49215595, 0.67836392, 0.27566575],
       [0.48336023, 0.67504445, 0.24672853],
       [0.47885343, 0.67136749, 0.22352431],
       [0.53498583, 0.66422097, 0.30099225],
       [0.51555078, 0.68000147, 0.27003876],
       [0.50632494, 0.67517748, 0.24434139],
       [0.49955107, 0.67154132, 0.22512216],
       [0.55252829, 0.65772819, 0.29191584],
       [0.53725348, 0.66866654, 0.26480445],
       [0.52858907, 0.66907577, 0.24578504],
       [0.52081835, 0.66914294, 0.22914286]]))

kinect2_middle
(array([[ 0.25805524, -0.14330683, -0.70323838],
       [ 0.23755131, -0.15640598, -0.67583703],
       [ 0.21632717, -0.17810507, -0.65924064],
       [ 0.19305602, -0.19927904, -0.66952522],
       [ 0.17236613, -0.20404604, -0.67753551],
       [ 0.19974594, -0.2020707 , -0.69879029],
       [ 0.15872292, -0.21343358, -0.69480874],
       [ 0.13170234, -0.20923564, -0.68773154],
       [ 0.11109021, -0.20073987, -0.68210294],
       [ 0.199793  , -0.19625675, -0.7200688 ],
       [ 0.15662736, -0.20924095, -0.71722308],
       [ 0.1261851 , -0.20253697, -0.71368982],
       [ 0.10450785, -0.19151102, -0.71012645],
       [ 0.20154913, -0.1867693 , -0.73684871],
       [ 0.16171137, -0.19888559, -0.73828475],
       [ 0.13505946, -0.19341821, -0.7323986 ],
       [ 0.11527754, -0.1855755 , -0.72754161],
       [ 0.20344128, -0.17313965, -0.74982706],
       [ 0.17327953, -0.18403933, -0.75301739],
       [ 0.15551203, -0.18502268, -0.74822493],
       [ 0.13789903, -0.18229422, -0.74530153]]), 
array([[ 0.24636096, -0.06459424, -0.73481013],
       [ 0.21662407, -0.05698227, -0.71090262],
       [ 0.17943783, -0.05121156, -0.70726615],
       [ 0.14837729, -0.05308462, -0.71853917],
       [ 0.12793847, -0.06956868, -0.72745343],
       [ 0.1566435 , -0.06746089, -0.74675463],
       [ 0.1166679 , -0.08200674, -0.74722766],
       [ 0.09893953, -0.09997769, -0.74067378],
       [ 0.08705045, -0.11750248, -0.73289643],
       [ 0.16623452, -0.08441807, -0.76453212],
       [ 0.12806941, -0.10875236, -0.766602  ],
       [ 0.11239126, -0.13240845, -0.75563243],
       [ 0.10241367, -0.15226003, -0.74676509],
       [ 0.17873854, -0.10010551, -0.77240924],
       [ 0.14594503, -0.12251127, -0.77513529],
       [ 0.13150476, -0.14293707, -0.76317673],
       [ 0.12082502, -0.15825822, -0.75424971],
       [ 0.19350514, -0.11465806, -0.77084525],
       [ 0.16739107, -0.13480626, -0.77116211],
       [ 0.15360272, -0.14940971, -0.7653657 ],
       [ 0.14148368, -0.16210854, -0.75996026]]))

kinect2_right
(array([[ 0.22794149, -0.34004319,  0.55556067],
       [ 0.22134314, -0.30439049,  0.5608691 ],
       [ 0.21544726, -0.27515871,  0.57840468],
       [ 0.20176543, -0.26881977,  0.6078713 ],
       [ 0.18387391, -0.26669128,  0.62166709],
       [ 0.20256197, -0.29594279,  0.62101284],
       [ 0.17215723, -0.27376277,  0.6412972 ],
       [ 0.1481243 , -0.25900016,  0.64281529],
       [ 0.1275925 , -0.2492314 ,  0.63938954],
       [ 0.19557289, -0.31640372,  0.62538112],
       [ 0.16372857, -0.29391005,  0.64818318],
       [ 0.13486196, -0.28178748,  0.65016326],
       [ 0.11187605, -0.2742406 ,  0.64581987],
       [ 0.18949139, -0.33473868,  0.62415392],
       [ 0.15928782, -0.31748448,  0.64708754],
       [ 0.13478475, -0.3043036 ,  0.64793996],
       [ 0.11509513, -0.29530475,  0.64515381],
       [ 0.18258538, -0.35112857,  0.61771471],
       [ 0.16000008, -0.33934967,  0.63745995],
       [ 0.14581676, -0.3283236 ,  0.64156818],
       [ 0.12979234, -0.32017404,  0.64330348]]), 
array([[ 0.17745971, -0.38850173,  0.50623965],
       [ 0.15319552, -0.3591347 ,  0.49832638],
       [ 0.11881537, -0.34414638,  0.50309058],
       [ 0.08987343, -0.34198408,  0.51899765],
       [ 0.07698495, -0.33698899,  0.54303658],
       [ 0.0972012 , -0.36506344,  0.54101507],
       [ 0.06811461, -0.34614034,  0.56562686],
       [ 0.06155757, -0.32815589,  0.58334037],
       [ 0.06020955, -0.31139042,  0.59837558],
       [ 0.1090279 , -0.37868767,  0.5602526 ],
       [ 0.08534843, -0.35869282,  0.59330569],
       [ 0.0839557 , -0.33578626,  0.61328308],
       [ 0.08551082, -0.31802657,  0.62923432],
       [ 0.12499995, -0.38514145,  0.57320395],
       [ 0.10508567, -0.36830961,  0.60328674],
       [ 0.10362195, -0.34602734,  0.61972146],
       [ 0.10264829, -0.32940694,  0.63202187],
       [ 0.14450407, -0.38454387,  0.58038083],
       [ 0.13002273, -0.3687835 ,  0.60547856],
       [ 0.12533885, -0.35398503,  0.61948015],
       [ 0.12123062, -0.34075045,  0.63153497]]))

The L2 norm of the coordinates looks fair enough, but the distance distributes mainly in different directions in these three cameras, i.e. x,y_axis in kinect2_left, z_axis in kinect2_middle, y,z_axis in kinect2_right. I thought it might because the coordinates are lying in world frame or object frame. How can I rotate the coordinates to make it lie in camera frame? Thanks in advance

samarth-robo commented 2 years ago

Hi @RitchieQi I think you should use cTo instead of its inverse. For example, see how they are projected into the image, using the P matrix that uses cTo.

RitchieQi commented 2 years ago

Hi @RitchieQi I think you should use cTo instead of its inverse. For example, see how they are projected into the image, using the P matrix that uses cTo.

That's right! They looks much better now. I'll post the new results below. Thanks a lot @samarth-robo.

kinect2_left
(array([[0.2565368 , 0.01868199, 0.78818366],
       [0.22842491, 0.03648041, 0.77282884],
       [0.21203477, 0.06118771, 0.75500561],
       [0.21846481, 0.09319641, 0.74955093],
       [0.21917481, 0.11492926, 0.75604247],
       [0.24759545, 0.09501801, 0.7571061 ],
       [0.23200948, 0.13471798, 0.76008047],
       [0.21448142, 0.15452287, 0.76999785],
       [0.19939909, 0.16714065, 0.78191566],
       [0.26474347, 0.09687327, 0.77085683],
       [0.2498228 , 0.13940964, 0.77368364],
       [0.23343277, 0.16176708, 0.78836935],
       [0.21905541, 0.17457946, 0.80364374],
       [0.27751058, 0.09459598, 0.7852261 ],
       [0.26736489, 0.13478804, 0.78941864],
       [0.25066427, 0.15394642, 0.80077245],
       [0.23675912, 0.1663165 , 0.81217643],
       [0.28579163, 0.08947422, 0.80144266],
       [0.28045894, 0.12115242, 0.80404396],
       [0.26987753, 0.13587141, 0.80736111],
       [0.25994629, 0.14914803, 0.81452424]]), 
array([[ 0.25761845, -0.00075087,  0.87155271],
       [ 0.22324412,  0.01619884,  0.87825387],
       [ 0.20451226,  0.04477038,  0.89444419],
       [ 0.20340833,  0.07479892,  0.90831553],
       [ 0.208349  ,  0.10191222,  0.90525083],
       [ 0.23550158,  0.08037886,  0.90515215],
       [ 0.22514652,  0.12160263,  0.90694867],
       [ 0.21786524,  0.1437413 ,  0.89524117],
       [ 0.21155565,  0.16038401,  0.88137881],
       [ 0.25960878,  0.08376332,  0.89500518],
       [ 0.2541285 ,  0.12828483,  0.88861619],
       [ 0.24533364,  0.15032991,  0.8695791 ],
       [ 0.23943801,  0.16616535,  0.85264546],
       [ 0.27569999,  0.08192526,  0.88078582],
       [ 0.27225402,  0.12107425,  0.87443396],
       [ 0.2621274 ,  0.14035159,  0.8572707 ],
       [ 0.25462609,  0.1546899 ,  0.84436221],
       [ 0.28397639,  0.0755856 ,  0.86279735],
       [ 0.28025256,  0.10742515,  0.85502749],
       [ 0.27415077,  0.12478211,  0.84510368],
       [ 0.26847438,  0.13990789,  0.83636642]]))

kinect2_middle
(array([[-0.01520219,  0.10222265,  0.8090005 ],
       [-0.0162786 ,  0.13593104,  0.82333293],
       [-0.01851367,  0.1704481 ,  0.82390933],
       [-0.0104938 ,  0.19400699,  0.80208467],
       [ 0.00458521,  0.207032  ,  0.79122494],
       [-0.00794769,  0.17816841,  0.77655724],
       [ 0.01459748,  0.21400931,  0.77064099],
       [ 0.0349519 ,  0.23260311,  0.77678828],
       [ 0.05353353,  0.24371669,  0.78452976],
       [ 0.00211866,  0.1651506 ,  0.76186764],
       [ 0.02565528,  0.20287952,  0.75396092],
       [ 0.05114471,  0.22066171,  0.75823189],
       [ 0.07265997,  0.23004402,  0.76552835],
       [ 0.01167445,  0.15080424,  0.75306252],
       [ 0.03459074,  0.18389024,  0.74228875],
       [ 0.05579403,  0.20094636,  0.74815154],
       [ 0.07361436,  0.21155533,  0.75495659],
       [ 0.02238462,  0.13558212,  0.74969144],
       [ 0.03937418,  0.16072768,  0.73883934],
       [ 0.05050351,  0.17526016,  0.74097555],
       [ 0.06429605,  0.18663048,  0.74356209]]), 
array([[0.0498793 , 0.04871141, 0.82417121],
       [0.06899666, 0.07457851, 0.84606214],
       [0.09891727, 0.09744629, 0.84940336],
       [0.12443949, 0.11416898, 0.83658485],
       [0.13269411, 0.13364701, 0.81865717],
       [0.11864191, 0.10469601, 0.80579897],
       [0.13992388, 0.13975992, 0.79450942],
       [0.14043811, 0.16523568, 0.78894842],
       [0.13649047, 0.18711124, 0.78509724],
       [0.1070642 , 0.1005509 , 0.78246771],
       [0.12171849, 0.13956645, 0.76468974],
       [0.11596656, 0.16905977, 0.75991423],
       [0.10888166, 0.19154697, 0.75586485],
       [0.0909683 , 0.0981304 , 0.76834119],
       [0.10297304, 0.13212856, 0.75146293],
       [0.09789879, 0.15930487, 0.74935528],
       [0.0939933 , 0.17956951, 0.74774986],
       [0.07090213, 0.09774234, 0.76291224],
       [0.07851869, 0.12702585, 0.74977987],
       [0.07832957, 0.14752744, 0.7457023 ],
       [0.07814667, 0.16560386, 0.74245265]]))

kinect2_right
(array([[-0.25512046,  0.04683273,  0.71553231],
       [-0.23143298,  0.06248791,  0.69236657],
       [-0.21851377,  0.08659038,  0.67117884],
       [-0.23244842,  0.10994204,  0.65230608],
       [-0.24373357,  0.11674542,  0.63383122],
       [-0.26022019,  0.11152152,  0.66393478],
       [-0.26299806,  0.12724062,  0.62427309],
       [-0.26044514,  0.12605356,  0.59616803],
       [-0.25827631,  0.11988907,  0.57412229],
       [-0.28120972,  0.10538314,  0.66682249],
       [-0.28545727,  0.12292282,  0.62541865],
       [-0.2869633 ,  0.11954875,  0.59426571],
       [-0.28700882,  0.11094756,  0.57123985],
       [-0.29748528,  0.0954857 ,  0.67025878],
       [-0.30541276,  0.11173808,  0.63272308],
       [-0.30397574,  0.10923003,  0.6050372 ],
       [-0.30243151,  0.10361384,  0.58400159],
       [-0.30999255,  0.0815187 ,  0.67276404],
       [-0.31806749,  0.09550498,  0.64487333],
       [-0.31624804,  0.09844877,  0.62677243],
       [-0.31625092,  0.09765482,  0.60872865]]), 
array([[-0.28826137, -0.03070616,  0.70074706],
       [-0.26955358, -0.03471974,  0.66686929],
       [-0.27206112, -0.03644813,  0.62918551],
       [-0.28826605, -0.0314749 ,  0.60075994],
       [-0.30038989, -0.01306399,  0.58393779],
       [-0.31473244, -0.01852786,  0.6150086 ],
       [-0.32173   ,  0.00017063,  0.57744193],
       [-0.31814913,  0.0200287 ,  0.56091808],
       [-0.31240563,  0.03884348,  0.54987397],
       [-0.33068217, -0.00298232,  0.62913357],
       [-0.33894856,  0.02522917,  0.59465464],
       [-0.3306701 ,  0.05059504,  0.58003322],
       [-0.32353594,  0.07153824,  0.57093634],
       [-0.33638947,  0.01115583,  0.64436626],
       [-0.34442559,  0.03686869,  0.61505529],
       [-0.33497208,  0.05890795,  0.60113968],
       [-0.32789865,  0.07542251,  0.59085757],
       [-0.33241378,  0.02408209,  0.66015806],
       [-0.33698181,  0.0468812 ,  0.63676302],
       [-0.33350648,  0.06296034,  0.62386492],
       [-0.33014545,  0.07696217,  0.61246305]]))