NVlabs / Deep_Object_Pose

Deep Object Pose Estimation (DOPE) – ROS inference (CoRL 2018)
Other
1.03k stars 287 forks source link

DOPEWriter in Isaac Sim to feed to DOPE for training #344

Closed monajalal closed 7 months ago

monajalal commented 8 months ago

I have created a dataset using Isaac Sim and I have pairs of RGB and JSON files.

Here is how my JSON file looks like:

{
  "camera_data": {
    "camera_view_matrix": [
      [
        0.4557649965319727,
        0.21822865046511744,
        -0.8629336730319336,
        0.0
      ],
      [
        -0.8901001448916918,
        0.11174133685206154,
        -0.441854733710417,
        0.0
      ],
      [
        1.3877787807814454e-17,
        0.9694793085748078,
        0.2451731434013772,
        -0.0
      ],
      [
        4.66962210938927,
        0.08731271362121838,
        -5.367450779842339,
        0.9999999999999998
      ]
    ],
    "camera_projection_matrix": [
      [
        2.2906228624246854,
        0.0,
        0.0,
        0.0
      ],
      [
        0.0,
        4.042275639572974,
        0.0,
        0.0
      ],
      [
        0.0,
        0.0,
        1.0000001149011741e-07,
        -1.0
      ],
      [
        0.0,
        0.0,
        0.10000001149011742,
        0.0
      ]
    ],
    "width": 960,
    "height": 544,
    "intrinsics": {
      "fx": 1099.498930273687,
      "fy": 1099.498930273687,
      "cx": 480.0,
      "cy": 272.0
    }
  },
  "keypoint_order": [
    "Center",
    "LDB",
    "LDF",
    "LUB",
    "LUF",
    "RDB",
    "RDF",
    "RUB",
    "RUF"
  ],
  "objects": [
    {
      "label": "myclass",
      "prim_path": "/Replicator/Ref_Xform/Ref",
      "visibility": 1.0,
      "local_to_world_transform": [
        [
          0.008375376462936401,
          -0.005463795270770788,
          0.0,
          0.0
        ],
        [
          0.005463795270770788,
          0.008375376462936401,
          0.0,
          0.0
        ],
        [
          0.0,
          0.0,
          0.009999999776482582,
          0.0
        ],
        [
          -0.4219878315925598,
          7.990025520324707,
          0.0,
          1.0
        ]
      ],
      "location": [
        -0.3824009296950116,
        7.9652429379737155,
        -0.05367450659870465
      ],
      "quaternion_xyzw": [
        0.7625695511020774,
        -0.41105437272068857,
        0.4565607637177254,
        0.20266783521976267
      ],
      "projected_cuboid": [
        [
          141,
          158
        ],
        [
          89,
          173
        ],
        [
          87,
          156
        ],
        [
          59,
          153
        ],
        [
          58,
          138
        ],
        [
          227,
          163
        ],
        [
          226,
          147
        ],
        [
          184,
          144
        ],
        [
          183,
          130
        ]
      ]
    },
    {
      "label": "myclass",
      "prim_path": "/Replicator/Ref_Xform_03/Ref",
      "visibility": 1.0,
      "local_to_world_transform": [
        [
          0.00888086762279272,
          0.00459675770252943,
          0.0,
          0.0
        ],
        [
          -0.00459675770252943,
          0.00888086762279272,
          0.0,
          0.0
        ],
        [
          0.0,
          0.0,
          0.009999999776482582,
          0.0
        ],
        [
          -4.476489067077637,
          3.4983103275299072,
          0.0,
          1.0
        ]
      ],
      "location": [
        -4.4354201266645505,
        3.5205508615806003,
        -0.05367450659870465
      ],
      "quaternion_xyzw": [
        -0.556708207377697,
        0.5850625583417068,
        -0.18739058441119272,
        -0.5591623588359644
      ],
      "projected_cuboid": [
        [
          305,
          452
        ],
        [
          530,
          538
        ],
        [
          531,
          482
        ],
        [
          16,
          539
        ],
        [
          9,
          483
        ],
        [
          515,
          389
        ],
        [
          516,
          347
        ],
        [
          142,
          389
        ],
        [
          138,
          348
        ]
      ]
    }
  ]
}

Can you please guide how to feed this data to DOPE? I don't want to use ROS or Isaac ROS. I cannot find any direct connector.

Also, do you expect only one object of interest to be in the image or are multiple instances of the same object are allowed?