Topdown photo - Githubissues

YicongHong commented 1 year ago

Hi There,

Does the dataset have topdown view photo for each floor in all environments?

Or is that a way to use the Habitat simulator to produce the top down photos?

Thanks!

YicongHong commented 1 year ago

I am wondering if it will require the real images (the original MP3D images) to produce high-quality top down photo, but it seems that HM3D doesn't provide those viewpoint coordinates / images?

Or I can simply randomly sample points in the environment and use depth to project the RGB to topdown view.

Edit: I just notice that the HM3D dataset webpage (https://aihabitat.org/datasets/hm3d/) has very high quality topdown RGB images, but they are in different scales (enviroment sizes are different). May I know how should I scale each image becasuse I want to project a 3D position onto those maps? Many thanks!

YicongHong commented 1 year ago

Will be great if you can release all the topdown RGB images for each floor for all environments and specify the scale for each of them :D Thanks!!!

srama2512 commented 1 year ago

@YicongHong,

Thanks for your interest. We generated the top-down views using the orthographic camera from habitat-sim. Here is a short snippet to get raw and unscaled maps with a pixel-resolution of 0.05m x 0.05m. You will need access to this function from the metrics script.

You can pass a glb_path to the render_topdown_views function to get the top-down images for each floor.

import os
import cv2
import math
import tqdm
import glob
import imageio
import argparse
import numpy as np
import habitat_sim
import multiprocessing as mp

from metrics import get_floor_navigable_extents

TOPDOWN_WIDTH = 1280

def make_ortho_habitat_configuration(scene_path):
    # simulator configuration
    backend_cfg = habitat_sim.SimulatorConfiguration()
    backend_cfg.scene_id = scene_path

    if habitat_sim.__version__ == "0.1.7":
        # agent configuration
        sensor_cfg = habitat_sim.SensorSpec()
        sensor_cfg.resolution = [4096, 4096]
        # Reference: src/esp/bindings/SensorBindings.cpp
        sensor_cfg.sensor_type = habitat_sim.SensorType.COLOR
        sensor_cfg.sensor_subtype = habitat_sim.SensorSubType.ORTHOGRAPHIC
        sensor_cfg.parameters['far'] = '1000'
        sensor_cfg.parameters['near'] = '0.01'
        sensor_cfg.parameters['fov'] = '90'
        sensor_cfg.parameters['ortho_scale'] = '0.05'
    else:
        sensor_cfg = habitat_sim.CameraSensorSpec()
        sensor_cfg.resolution = [4096, 4096]
        sensor_cfg.sensor_type = habitat_sim.SensorType.COLOR
        sensor_cfg.sensor_subtype = habitat_sim.SensorSubType.ORTHOGRAPHIC
        sensor_cfg.far = 1000.0
        sensor_cfg.near = 0.01
        sensor_cfg.hfov = 90
        sensor_cfg.ortho_scale = 0.05
        sensor_cfg.clear_color = [0., 0., 0., 0.]
    agent_cfg = habitat_sim.agent.AgentConfiguration()
    agent_cfg.sensor_specifications = [sensor_cfg]

    return habitat_sim.Configuration(backend_cfg, [agent_cfg])

def robust_load_ortho_sim(scene_path):
    sim_cfg = make_ortho_habitat_configuration(scene_path)
    hsim = habitat_sim.Simulator(sim_cfg)
    if not hsim.pathfinder.is_loaded:
        navmesh_settings = habitat_sim.NavMeshSettings()
        navmesh_settings.set_defaults()
        hsim.recompute_navmesh(hsim.pathfinder, navmesh_settings)
    return hsim

def get_downward_quaternion():
    """
    Given a unit vector u = ux i + uy j + uz k, and a rotation angle theta
    the corresponding quaternion is as defined follows:
    q = cos(theta/2) + (ux i + uy j + uz k) sin(theta/2)

    To get downward rotation, rotate about either i or k
    since j is upward in habitat.

    By default, the agent faces -Z. To convert this vector to downward,
    rotate by -90 degree along X.

    Output format: 
        q - [x, y, z, w] elements of a unit quaternion

    Reference: https://en.wikipedia.org/wiki/Quaternions_and_spatial_rotation
    """
    # -90 degree rotation about x
    q = [-0.7071067, 0.0, 0.0, 0.7071067]
    return q

def render_topdown_views(glb_path):
    sim = robust_load_ortho_sim(glb_path)
    # Get floor extents
    floor_extents = get_floor_navigable_extents(sim)
    floor_extents = sorted(floor_extents, key=lambda x: x['mean'])
    navmesh_vertices = np.array(sim.pathfinder.build_navmesh_vertices())
    floor_images = []
    scene_cent = navmesh_vertices.mean(axis=0).tolist()
    for fext in floor_extents:
        # Get navmesh vertices from current floor
        mask = (
            (navmesh_vertices[:, 1] <= fext['max'] + 0.25) & \
            (navmesh_vertices[:, 1] >= fext['min'] - 0.25)
        )
        fcent = np.median(navmesh_vertices[mask, :], axis=0).tolist() # (3, )
        # Set agent state
        agent_position = [scene_cent[0], fcent[1] + 1.0, scene_cent[2]]
        agent_rotation = get_downward_quaternion()
        agent = sim.get_agent(0)
        new_state = agent.get_state()
        new_state.position = agent_position
        new_state.rotation = agent_rotation
        new_state.sensor_states = {}
        agent.set_state(new_state, True)
        # Get observations
        obs = sim.get_sensor_observations()
        floor_images.append(obs['rgba_camera'])

    # Concatenate images vertically
    floor_images = np.concatenate(floor_images, axis=0)

    sim.close()

    return floor_images

YicongHong commented 1 year ago

Hi Santhosh, thank you very much for your reply and sharing the code!

facebookresearch / habitat-matterport3d-dataset

Topdown photo #7