xinge008 / Cylinder3D

Rank 1st in the leaderboard of SemanticKITTI semantic segmentation (both single-scan and multi-scan) (Nov. 2020) (CVPR2021 Oral)
Apache License 2.0
856 stars 180 forks source link

weights of conv3 in UpBlock exist NAN #43

Closed cjyiiiing closed 3 years ago

cjyiiiing commented 3 years ago

I download the pretrained model for SemanticKITTI and print all parameters. Then I find the weight of conv3 in UpBlock always exist NAN like this:

cylinder_3d_spconv_seg.upBlock0.conv3.weight
tensor([[[[[-0.3146,  0.0267,  0.1047,  ...,  0.1638, -0.2747,  0.1784],
           [-0.8915, -0.2567, -2.1439,  ..., -0.0997, -0.1918, -0.8922],
           [-1.6583, -1.0413, -0.8019,  ...,  0.0865, -1.4436, -1.4832],
           ...,
           [ 1.4814,  0.8771, -2.7230,  ..., -0.1184,  2.6314,  0.6151],
           [ 0.5554, -0.6339, -0.3210,  ...,  1.5973,  0.8244,  0.9369],
           [ 0.3099, -0.3487, -0.9573,  ...,  1.8343,  2.0645, -1.4957]],

          [[ 0.2434,  0.6077,  0.2401,  ...,  0.6087,  0.2166,  0.3725],
           [-0.5271, -0.3993, -1.2025,  ...,  0.6850, -0.9096, -0.5193],
           [-1.7771,  0.8933, -0.4569,  ...,  0.0112, -0.6558, -0.3272],
           ...,
           [ 0.3616, -0.4162, -2.7452,  ..., -0.1293,  0.7318, -1.2711],
           [ 0.3186,  0.4266,  0.3109,  ...,  0.1906, -0.1706,  0.4632],
           [ 0.8191,  1.3683,  0.0891,  ...,  2.4128,  1.5446,  0.6645]],

          [[ 0.3023,  0.3807,  0.2493,  ..., -1.0415,  0.1056, -0.2603],
           [ 0.9599,  0.7323,  0.1478,  ...,  0.7111, -0.1298,  0.8966],
           [ 0.7723,  0.6026, -0.1770,  ...,  0.5635,  1.8141, -0.8636],
           ...,
           [-1.4419, -1.1362, -2.6401,  ...,  0.6550,  0.7862, -0.9430],
           [-0.9298,  0.6791, -1.7838,  ...,  0.9366,  0.2681, -1.3272],
           [ 1.1701,  1.1824, -1.6827,  ...,  2.5493,  0.9161,  0.3196]]],

         [[[ 0.4152, -0.6454,  1.0213,  ..., -0.5271,  0.8759,  0.3317],
           [ 0.0152, -0.8058, -0.5352,  ..., -2.3797,  0.0426,  0.2902],
           [-2.8191, -2.3472, -0.2284,  ...,  0.1618, -0.2339, -2.1502],
           ...,
           [ 1.9137, -0.4314, -0.4321,  ...,  1.2808,  1.4155,  2.1384],
           [ 0.5110, -0.2770, -0.5622,  ...,  1.9154, -1.1110, -0.0050],
           [-0.8146,  0.8423, -1.0469,  ...,  1.8412,  0.3185, -0.1781]],

          [[ 1.3330, -0.0630,  0.4452,  ...,  0.8059,  1.5876,  0.0179],
           [ 1.3694, -0.9363, -1.4790,  ...,  0.6796,  1.1329,  1.5354],
           [-1.9959, -0.3877, -0.4930,  ...,  0.6501, -0.9343,  0.1375],
           ...,
           [ 0.7912, -1.1728,  0.4941,  ..., -0.0465, -1.7027, -0.0237],
           [-1.0025,  1.3098, -0.0768,  ..., -0.4612, -3.4584, -1.4110],
           [-0.4183,  1.0766,  0.1007,  ...,  1.8095, -0.3510, -1.2115]],

          [[ 0.4813, -1.0362,  0.1347,  ..., -0.8623, -0.3301, -1.9667],
           [ 1.4352, -1.1810,  0.5029,  ...,  0.8198,  0.6615, -0.6821],
           [ 0.1441,  0.6090,  0.3326,  ...,  0.7842, -0.8729, -1.4596],
           ...,
           [-0.1876, -0.9668, -0.5698,  ..., -0.4510,  0.0355, -0.0828],
           [-0.8910, -0.0146, -1.5001,  ...,  0.1596,  0.1089, -0.6375],
           [ 0.9165,  0.4500, -1.9313,  ...,  1.6017, -0.0742,  0.9825]]],

         [[[-0.6726, -0.7895,  0.6720,  ...,  0.1075,  0.2664,  0.1012],
           [ 0.0861,  0.4065, -0.6354,  ...,  0.1761, -0.2696, -0.0994],
           [-1.8868, -1.5101, -0.5280,  ...,  1.5993,  0.7808, -0.4933],
           ...,
           [ 2.0131, -0.2112,  0.7153,  ...,  0.8866,  1.1386,  1.9173],
           [ 0.2099, -0.9653, -0.4816,  ...,  1.1221, -1.0017,  0.2882],
           [ 0.6900,  0.0487, -1.6519,  ...,  0.9704,  0.1471, -0.7587]],

          [[ 0.0492, -0.5954, -0.1301,  ...,  0.0145,  0.4446, -0.2605],
           [ 0.8087, -0.4472, -1.0978,  ...,  0.2893,  0.1475,  0.2756],
           [-0.8992, -0.6114, -1.1732,  ...,  0.4818,  0.1650, -0.3893],
           ...,
           [ 0.7977, -0.9973,  2.3349,  ...,  0.5786, -0.1661,  0.5659],
           [ 0.2593,  1.7855,  0.0986,  ...,  0.1885, -1.8025, -0.5972],
           [-0.3040,  0.7271, -0.2476,  ...,  0.4204,  1.4570, -1.3176]],

          [[ 0.6191, -0.9954, -0.2723,  ..., -1.9767,  0.3893, -1.4162],
           [ 1.4961, -1.1198, -0.1618,  ...,  0.9444,  0.0264,  0.3396],
           [ 1.1053,  1.5829,  1.0865,  ...,  0.8140,  0.0177, -0.9481],
           ...,
           [-0.9551,  0.2058,  0.6879,  ..., -1.3027, -0.5232, -0.9984],
           [-1.3750, -0.3583, -0.7769,  ...,  0.5739, -0.3673, -1.2868],
           [ 1.1416, -0.3617, -1.5134,  ...,  2.5882,  0.1847,  0.5558]]]],

        [[[[    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           ...,
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan]],

          [[    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           ...,
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan]],

          [[    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           ...,
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan]]],

         [[[    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           ...,
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan]],

          [[    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           ...,
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan]],

          [[    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           ...,
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan]]],

         [[[    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           ...,
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan]],

          [[    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           ...,
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan]],

          [[    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           ...,
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan]]]],

        [[[[    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           ...,
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan]],

          [[    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           ...,
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan]],

          [[    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           ...,
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan]]],

         [[[    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           ...,
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan]],

          [[    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           ...,
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan]],

          [[    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           ...,
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan]]],

         [[[    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           ...,
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan]],

          [[    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           ...,
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan]],

          [[    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           ...,
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
           [    nan,     nan,     nan,  ...,     nan,     nan,     nan]]]]],
       device='cuda:0')

I'm very confused about it. Could you explain why?

xinge008 commented 3 years ago

It is strange and I think it is an issue of spconv; You can refer to this issue

cjyiiiing commented 3 years ago

It is strange and I think it is an issue of spconv; You can refer to this issue

I read this issue and find no solution to this problem. I print out the input and output of conv3 in UpBlock. There is no nan. Does the nan problem affect the training?

xinge008 commented 3 years ago

From empirical study, it does not.

cjyiiiing commented 3 years ago

Thanks!