Possible bugs in orientation labeling in nvisii_data_gen ?

Hello, I trained the DOPE with custom dataset using the script in nvisii_data_gen, and the model outputs the belief maps as intended. But predicted results of orientation do not match well with labels during inference. (location value matches well)

Here are two example images I synthesized using your script: [00621.png] 00621 [00622.png] 00622

Overlayed boxes are model predicted (green) and labeld (red)

Here are corresponding json labels: [00621.json]

{
    "camera_data": {
        "camera_look_at": {
            "at": [
                1.0,
                0.0,
                0.0
            ],
            "eye": [
                0.0,
                0.0,
                0.0
            ],
            "up": [
                0.0,
                0.0,
                1.0
            ]
        },
        "camera_view_matrix": [
            [
                0.0,
                0.0,
                1.0,
                0.0
            ],
            [
                -1.0,
                0.0,
                0.0,
                0.0
            ],
            [
                0.0,
                -1.0,
                0.0,
                0.0
            ],
            [
                0.0,
                0.0,
                0.0,
                1.0
            ]
        ],
        "height": 400,
        "intrinsics": {
            "cx": 200.0,
            "cy": 200.0,
            "fx": 482.84283447265625,
            "fy": 482.84283447265625
        },
        "location_worldframe": [
            -0.0,
            0.0,
            -0.0
        ],
        "quaternion_xyzw_worldframe": [
            -0.5,
            0.5,
            -0.5,
            0.5
        ],
        "width": 400
    },
    "objects": [
        {
            "bounding_box_minx_maxx_miny_maxy": [
                41,
                111,
                67,
                135
            ],
            "class": "turtlebot",
            "local_cuboid": null,
            "local_to_world_matrix": [
                [
                    0.006729187909513712,
                    -0.0053104218095541,
                    -0.005149509757757187,
                    -0.0
                ],
                [
                    -0.003909077495336533,
                    0.0033570986706763506,
                    -0.008570238016545773,
                    0.0
                ],
                [
                    0.006279899738729,
                    0.00778005737811327,
                    0.00018316967179998755,
                    -0.0
                ],
                [
                    1.537642478942871,
                    0.4071166217327118,
                    0.2701554596424103,
                    1.0
                ]
            ],
            "location": [
                -0.4071166217327118,
                -0.2701554596424103,
                1.537642478942871
            ],
            "location_worldframe": [
                1.537642478942871,
                0.4071166217327118,
                0.2701554596424103
            ],
            "name": "turtlebot",
            "projected_cuboid": [
                [
                    94.89465951919556,
                    93.67061853408813
                ],
                [
                    58.71387720108032,
                    58.25803279876709
                ],
                [
                    31.681668758392334,
                    102.83795595169067
                ],
                [
                    71.45127058029175,
                    136.62052154541016
                ],
                [
                    127.05178260803223,
                    87.65465021133423
                ],
                [
                    90.88515043258667,
                    49.269306659698486
                ],
                [
                    63.53034973144531,
                    96.68266773223877
                ],
                [
                    103.55539321899414,
                    133.25066566467285
                ],
                [
                    80.70358037948608,
                    95.03343105316162
                ]
            ],
            "provenance": "nvisii",
            "px_count_all": 0,
            "px_count_visib": 0,
            "quaternion_xyzw": [
                0.24471940100193024,
                -0.2521558105945587,
                0.249707892537117,
                0.25853845477104187
            ],
            "quaternion_xyzw_worldframe": [
                -0.00813349150121212,
                0.005685585550963879,
                -0.0006971018738113344,
                0.5025607943534851
            ],
            "segmentation_id": 2,
            "visibility": 1
        }
    ]
}

[00622.json]

{
    "camera_data": {
        "camera_look_at": {
            "at": [
                1.0,
                0.0,
                0.0
            ],
            "eye": [
                0.0,
                0.0,
                0.0
            ],
            "up": [
                0.0,
                0.0,
                1.0
            ]
        },
        "camera_view_matrix": [
            [
                0.0,
                0.0,
                1.0,
                0.0
            ],
            [
                -1.0,
                0.0,
                0.0,
                0.0
            ],
            [
                0.0,
                -1.0,
                0.0,
                0.0
            ],
            [
                0.0,
                0.0,
                0.0,
                1.0
            ]
        ],
        "height": 400,
        "intrinsics": {
            "cx": 200.0,
            "cy": 200.0,
            "fx": 482.84283447265625,
            "fy": 482.84283447265625
        },
        "location_worldframe": [
            -0.0,
            0.0,
            -0.0
        ],
        "quaternion_xyzw_worldframe": [
            -0.5,
            0.5,
            -0.5,
            0.5
        ],
        "width": 400
    },
    "objects": [
        {
            "bounding_box_minx_maxx_miny_maxy": [
                72,
                142,
                83,
                149
            ],
            "class": "turtlebot",
            "local_cuboid": null,
            "local_to_world_matrix": [
                [
                    0.0019451454281806946,
                    0.00398661382496357,
                    0.008962329477071762,
                    -0.0
                ],
                [
                    0.0028044083155691624,
                    0.00852944329380989,
                    -0.004402714315801859,
                    0.0
                ],
                [
                    -0.00939955934882164,
                    0.003369794460013509,
                    0.0005410914891399443,
                    -0.0
                ],
                [
                    1.5518096685409546,
                    0.3478415906429291,
                    0.24086709320545197,
                    1.0
                ]
            ],
            "location": [
                -0.3478415906429291,
                -0.24086709320545197,
                1.5518096685409546
            ],
            "location_worldframe": [
                1.5518096685409546,
                0.3478415906429291,
                0.24086709320545197
            ],
            "name": "turtlebot",
            "projected_cuboid": [
                [
                    110.9567403793335,
                    62.43329048156738
                ],
                [
                    133.4183692932129,
                    114.49103355407715
                ],
                [
                    81.35592937469482,
                    145.81623077392578
                ],
                [
                    60.92987060546875,
                    94.91168260574341
                ],
                [
                    137.22708225250244,
                    79.00468111038208
                ],
                [
                    157.97545909881592,
                    126.09975337982178
                ],
                [
                    110.24706363677979,
                    153.93446683883667
                ],
                [
                    91.18901491165161,
                    107.79221057891846
                ],
                [
                    110.58106422424316,
                    110.86838245391846
                ]
            ],
            "provenance": "nvisii",
            "px_count_all": 0,
            "px_count_visib": 0,
            "quaternion_xyzw": [
                0.25371211767196655,
                -0.25816500186920166,
                0.24516919255256653,
                0.24844633042812347
            ],
            "quaternion_xyzw_worldframe": [
                -0.003865025006234646,
                -0.009130791760981083,
                0.0005878737429156899,
                0.5027463436126709
            ],
            "segmentation_id": 2,
            "visibility": 1
        }
    ]
}

The objects in each image have different orientations, but quaternion_xyzw attributes in both labels are almost identical. Could this be a bug from nvisii method used in export_to_ndds_file in nvisii_data_gen/utils.py ? Current version of nvisii in my machine == 1.1.72

NVlabs / Deep_Object_Pose

Possible bugs in orientation labeling in nvisii_data_gen ? #266