Failure in pixel sampler when using depth-nerfacto

Attempting to use depth-nerfacto with the following transforms.json:

{
    "frames": [
        {
            "camera_angle_x": 1.6206331253051758,
            "camera_angle_y": 1.0682151317596436,
            "cx": 1922.9263916015625,
            "cy": 1095.8277587890625,
            "depth_file_path": "sensor0\\depth\\000001.png",
            "file_path": "sensor0\\rgb\\000001.png",
            "fl_x": 1826.620849609375,
            "fl_y": 1826.0267333984375,
            "h": 2160,
            "k1": 0,
            "k2": 0,
            "k3": 0,
            "k4": 0,
            "p1": 0,
            "p2": 0,
            "transform_matrix": [
                [
                    0.024848394095897675,
                    -0.3371071219444275,
                    0.9411383271217346,
                    1.4621940851211548
                ],
                [
                    0.9996904134750366,
                    0.009575003758072853,
                    -0.022964637726545334,
                    -0.03897222876548767
                ],
                [
                    -0.0012698600767180324,
                    0.9414176344871521,
                    0.33724066615104675,
                    1.6487826108932495
                ],
                [
                    0.0,
                    -0.0,
                    0.0,
                    1.0
                ]
            ],
            "w": 3840,
            "x_fov": 92.85543823242188,
            "y_fov": 61.204219818115234
        },
        {
            "camera_angle_x": 1.6187852621078491,
            "camera_angle_y": 1.066586971282959,
            "cx": 1282.184814453125,
            "cy": 726.3068237304688,
            "depth_file_path": "sensor1\\depth\\000001.png",
            "file_path": "sensor1\\rgb\\000001.png",
            "fl_x": 1220.0023193359375,
            "fl_y": 1219.6160888671875,
            "h": 1440,
            "k1": 0,
            "k2": 0,
            "k3": 0,
            "k4": 0,
            "p1": 0,
            "p2": 0,
            "transform_matrix": [
                [
                    0.7412542700767517,
                    -0.059115394949913025,
                    0.6686161160469055,
                    1.484533429145813
                ],
                [
                    0.6712237000465393,
                    0.06387709826231003,
                    -0.7384974360466003,
                    -1.4292242527008057
                ],
                [
                    0.0009473099489696324,
                    0.9962053298950195,
                    0.08702882379293442,
                    1.1107122898101807
                ],
                [
                    0.0,
                    -0.0,
                    0.0,
                    1.0
                ]
            ],
            "w": 2560,
            "x_fov": 92.74956512451172,
            "y_fov": 61.110931396484375
        },
        {
            "camera_angle_x": 1.6254974603652954,
            "camera_angle_y": 1.0724430084228516,
            "cx": 1279.8321533203125,
            "cy": 728.6819458007813,
            "depth_file_path": "sensor2\\depth\\000001.png",
            "file_path": "sensor2\\rgb\\000001.png",
            "fl_x": 1211.830078125,
            "fl_y": 1211.4990234375,
            "h": 1440,
            "k1": 0,
            "k2": 0,
            "k3": 0,
            "k4": 0,
            "p1": 0,
            "p2": 0,
            "transform_matrix": [
                [
                    0.675960123538971,
                    0.0888102799654007,
                    -0.7315672636032104,
                    -1.3041361570358276
                ],
                [
                    -0.7369378805160522,
                    0.08242889493703842,
                    -0.6709158420562744,
                    -1.35117769241333
                ],
                [
                    0.0007180602988228202,
                    0.9926319718360901,
                    0.12116630375385284,
                    1.1181082725524902
                ],
                [
                    0.0,
                    -0.0,
                    0.0,
                    1.0
                ]
            ],
            "w": 2560,
            "x_fov": 93.13414764404297,
            "y_fov": 61.44646072387695
        },
        {
            "camera_angle_x": 1.6216586828231812,
            "camera_angle_y": 1.0688626766204834,
            "cx": 1280.57275390625,
            "cy": 728.6508178710938,
            "depth_file_path": "sensor3\\depth\\000001.png",
            "file_path": "sensor3\\rgb\\000001.png",
            "fl_x": 1216.4974365234375,
            "fl_y": 1216.4522705078125,
            "h": 1440,
            "k1": 0,
            "k2": 0,
            "k3": 0,
            "k4": 0,
            "p1": 0,
            "p2": 0,
            "transform_matrix": [
                [
                    -0.7251179814338684,
                    0.0811152532696724,
                    -0.6838305592536926,
                    -1.235578179359436
                ],
                [
                    -0.688602864742279,
                    -0.09331951290369034,
                    0.7191088795661926,
                    1.4263954162597656
                ],
                [
                    -0.005484039429575205,
                    0.9923264384269714,
                    0.12352384626865387,
                    1.127084493637085
                ],
                [
                    0.0,
                    -0.0,
                    0.0,
                    1.0
                ]
            ],
            "w": 2560,
            "x_fov": 92.91419982910156,
            "y_fov": 61.2413215637207
        },
        {
            "camera_angle_x": 1.6239713430404663,
            "camera_angle_y": 1.0713775157928467,
            "cx": 1281.7987060546875,
            "cy": 730.7208251953125,
            "depth_file_path": "sensor4\\depth\\000001.png",
            "file_path": "sensor4\\rgb\\000001.png",
            "fl_x": 1213.68359375,
            "fl_y": 1212.969970703125,
            "h": 1440,
            "k1": 0,
            "k2": 0,
            "k3": 0,
            "k4": 0,
            "p1": 0,
            "p2": 0,
            "transform_matrix": [
                [
                    -0.704884946346283,
                    -0.06748456507921219,
                    0.7061041593551636,
                    1.5576385259628296
                ],
                [
                    0.7092986106872559,
                    -0.0590321384370327,
                    0.7024319767951965,
                    1.3530181646347046
                ],
                [
                    -0.005720482673496008,
                    0.9959723949432373,
                    0.08947757631540298,
                    1.1262444257736206
                ],
                [
                    0.0,
                    -0.0,
                    0.0,
                    1.0
                ]
            ],
            "w": 2560,
            "x_fov": 93.04670715332031,
            "y_fov": 61.38541030883789
        },
        {
            "camera_angle_x": 1.6218918561935425,
            "camera_angle_y": 1.069124698638916,
            "cx": 1278.5623779296875,
            "cy": 736.3524780273438,
            "depth_file_path": "sensor5\\depth\\000001.png",
            "file_path": "sensor5\\rgb\\000001.png",
            "fl_x": 1216.21337890625,
            "fl_y": 1216.088623046875,
            "h": 1440,
            "k1": 0,
            "k2": 0,
            "k3": 0,
            "k4": 0,
            "p1": 0,
            "p2": 0,
            "transform_matrix": [
                [
                    0.026518484577536583,
                    -0.9629524946212769,
                    0.2683640122413635,
                    0.6008144021034241
                ],
                [
                    0.9993513822555542,
                    0.032080307602882385,
                    0.016360348090529442,
                    0.005734376609325409
                ],
                [
                    -0.024363437667489052,
                    0.26775607466697693,
                    0.9631786346435547,
                    2.4697155952453613
                ],
                [
                    0.0,
                    -0.0,
                    0.0,
                    1.0
                ]
            ],
            "w": 2560,
            "x_fov": 92.92755889892578,
            "y_fov": 61.25633239746094
        }
    ]
}

PS C:\Users\tim\repos\nerfstudio> ns-train depth-nerfacto --data .\data\custom\MISENBERG_T04_06_03_12_11_06_Export_08_28_15_12_37\
C:\Users\tim\repos\nerfstudio\nerfstudio\utils\misc.py:184: RuntimeWarning: Windows does not yet support torch.compile and the performance will be affected.
  warnings.warn(
[13:23:19] Using --data alias for --data.pipeline.datamanager.data                                          train.py:231
──────────────────────────────────────────────────────── Config ────────────────────────────────────────────────────────
TrainerConfig(
    _target=<class 'nerfstudio.engine.trainer.Trainer'>,
    output_dir=WindowsPath('outputs'),
    method_name='depth-nerfacto',
    experiment_name=None,
    project_name='nerfstudio-project',
    timestamp='2023-08-30_132319',
    machine=MachineConfig(seed=42, num_devices=1, num_machines=1, machine_rank=0, dist_url='auto', device_type='cuda'),
    logging=LoggingConfig(
        relative_log_dir=WindowsPath('.'),
        steps_per_log=10,
        max_buffer_size=20,
        local_writer=LocalWriterConfig(
            _target=<class 'nerfstudio.utils.writer.LocalWriter'>,
            enable=True,
            stats_to_track=(
                <EventName.ITER_TRAIN_TIME: 'Train Iter (time)'>,
                <EventName.TRAIN_RAYS_PER_SEC: 'Train Rays / Sec'>,
                <EventName.CURR_TEST_PSNR: 'Test PSNR'>,
                <EventName.VIS_RAYS_PER_SEC: 'Vis Rays / Sec'>,
                <EventName.TEST_RAYS_PER_SEC: 'Test Rays / Sec'>,
                <EventName.ETA: 'ETA (time)'>
            ),
            max_log_size=10
        ),
        profiler='basic'
    ),
    viewer=ViewerConfig(
        relative_log_filename='viewer_log_filename.txt',
        websocket_port=None,
        websocket_port_default=7007,
        websocket_host='0.0.0.0',
        num_rays_per_chunk=32768,
        max_num_display_images=512,
        quit_on_train_completion=False,
        image_format='jpeg',
        jpeg_quality=90
    ),
    pipeline=VanillaPipelineConfig(
        _target=<class 'nerfstudio.pipelines.base_pipeline.VanillaPipeline'>,
        datamanager=VanillaDataManagerConfig(
            _target=nerfstudio.data.datamanagers.base_datamanager.VanillaDataManager[nerfstudio.data.datasets.depth_data
set.DepthDataset],
            data=WindowsPath('data/custom/MISENBERG_T04_06_03_12_11_06_Export_08_28_15_12_37'),
            camera_optimizer=CameraOptimizerConfig(
                _target=<class 'nerfstudio.cameras.camera_optimizers.CameraOptimizer'>,
                mode='SO3xR3',
                position_noise_std=0.0,
                orientation_noise_std=0.0,
                optimizer=AdamOptimizerConfig(
                    _target=<class 'torch.optim.adam.Adam'>,
                    lr=0.0006,
                    eps=1e-08,
                    max_norm=None,
                    weight_decay=0.01
                ),
                scheduler=ExponentialDecaySchedulerConfig(
                    _target=<class 'nerfstudio.engine.schedulers.ExponentialDecayScheduler'>,
                    lr_pre_warmup=1e-08,
                    lr_final=None,
                    warmup_steps=0,
                    max_steps=10000,
                    ramp='cosine'
                ),
                param_group='camera_opt'
            ),
            masks_on_gpu=False,
            images_on_gpu=False,
            dataparser=NerfstudioDataParserConfig(
                _target=<class 'nerfstudio.data.dataparsers.nerfstudio_dataparser.Nerfstudio'>,
                data=WindowsPath('.'),
                scale_factor=1.0,
                downscale_factor=None,
                scene_scale=1.0,
                orientation_method='up',
                center_method='poses',
                auto_scale_poses=True,
                eval_mode='fraction',
                train_split_fraction=0.9,
                eval_interval=8,
                depth_unit_scale_factor=0.001
            ),
            train_num_rays_per_batch=4096,
            train_num_images_to_sample_from=-1,
            train_num_times_to_repeat_images=-1,
            eval_num_rays_per_batch=4096,
            eval_num_images_to_sample_from=-1,
            eval_num_times_to_repeat_images=-1,
            eval_image_indices=(0,),
            collate_fn=<function nerfstudio_collate at 0x000001966C488430>,
            camera_res_scale_factor=1.0,
            patch_size=1,
            pixel_sampler=PairPixelSamplerConfig(
                _target=<class 'nerfstudio.data.pixel_samplers.PairPixelSampler'>,
                num_rays_per_batch=4096,
                keep_full_image=False,
                is_equirectangular=False,
                radius=2
            )
        ),
        model=DepthNerfactoModelConfig(
            _target=<class 'nerfstudio.models.depth_nerfacto.DepthNerfactoModel'>,
            enable_collider=True,
            collider_params={'near_plane': 2.0, 'far_plane': 6.0},
            loss_coefficients={'rgb_loss_coarse': 1.0, 'rgb_loss_fine': 1.0},
            eval_num_rays_per_chunk=32768,
            prompt=None,
            near_plane=0.05,
            far_plane=1000.0,
            background_color='last_sample',
            hidden_dim=64,
            hidden_dim_color=64,
            hidden_dim_transient=64,
            num_levels=16,
            base_res=16,
            max_res=2048,
            log2_hashmap_size=19,
            features_per_level=2,
            num_proposal_samples_per_ray=(256, 96),
            num_nerf_samples_per_ray=48,
            proposal_update_every=5,
            proposal_warmup=5000,
            num_proposal_iterations=2,
            use_same_proposal_network=False,
            proposal_net_args_list=[
                {'hidden_dim': 16, 'log2_hashmap_size': 17, 'num_levels': 5, 'max_res': 128, 'use_linear': False},
                {'hidden_dim': 16, 'log2_hashmap_size': 17, 'num_levels': 5, 'max_res': 256, 'use_linear': False}
            ],
            proposal_initial_sampler='piecewise',
            interlevel_loss_mult=1.0,
            distortion_loss_mult=0.002,
            orientation_loss_mult=0.0001,
            pred_normal_loss_mult=0.001,
            use_proposal_weight_anneal=True,
            use_average_appearance_embedding=True,
            proposal_weights_anneal_slope=10.0,
            proposal_weights_anneal_max_num_iters=1000,
            use_single_jitter=True,
            predict_normals=False,
            disable_scene_contraction=False,
            use_gradient_scaling=False,
            implementation='tcnn',
            appearance_embed_dim=32,
            depth_loss_mult=0.001,
            is_euclidean_depth=False,
            depth_sigma=0.01,
            should_decay_sigma=False,
            starting_depth_sigma=0.2,
            sigma_decay_rate=0.99985,
            depth_loss_type=<DepthLossType.DS_NERF: 1>
        )
    ),
    optimizers={
        'proposal_networks': {
            'optimizer': AdamOptimizerConfig(
                _target=<class 'torch.optim.adam.Adam'>,
                lr=0.01,
                eps=1e-15,
                max_norm=None,
                weight_decay=0
            ),
            'scheduler': None
        },
        'fields': {
            'optimizer': AdamOptimizerConfig(
                _target=<class 'torch.optim.adam.Adam'>,
                lr=0.01,
                eps=1e-15,
                max_norm=None,
                weight_decay=0
            ),
            'scheduler': None
        }
    },
    vis='viewer',
    data=WindowsPath('data/custom/MISENBERG_T04_06_03_12_11_06_Export_08_28_15_12_37'),
    prompt=None,
    relative_model_dir=WindowsPath('nerfstudio_models'),
    load_scheduler=True,
    steps_per_save=2000,
    steps_per_eval_batch=500,
    steps_per_eval_image=500,
    steps_per_eval_all_images=25000,
    max_num_iterations=30000,
    mixed_precision=True,
    use_grad_scaler=False,
    save_only_latest_checkpoint=True,
    load_dir=None,
    load_step=None,
    load_config=None,
    load_checkpoint=None,
    log_gradients=False,
    gradient_accumulation_steps=1
)
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
           Saving config to:                                                                    experiment_config.py:136
           outputs\MISENBERG_T04_06_03_12_11_06_Export_08_28_15_12_37\depth-nerfacto\2023-08-30
           _132319\config.yml
           Saving checkpoints to:                                                                         trainer.py:135
           outputs\MISENBERG_T04_06_03_12_11_06_Export_08_28_15_12_37\depth-nerfacto\2023-08-30_132319\ne
           rfstudio_models
           Auto image downscale factor of 1                                                 nerfstudio_dataparser.py:349
Variable resolution, using variable_res_collate
Setting up training dataset...
Caching all 6 images.
╭─────────────────────────────────────────── Viewer ───────────────────────────────────────────╮
│        ╷                                                                                     │
│   HTTP │ https://viewer.nerf.studio/versions/23-05-15-1/?websocket_url=ws://localhost:7007   │
│        ╵                                                                                     │
╰──────────────────────────────────────────────────────────────────────────────────────────────╯
[NOTE] Not running eval iterations since only viewer is enabled.
Use --vis {wandb, tensorboard, viewer+wandb, viewer+tensorboard} to run with eval.
No Nerfstudio checkpoint to load, so training from scratch.
Disabled tensorboard/wandb event writers
Printing profiling stats, from longest to shortest duration in seconds
VanillaPipeline.get_train_loss_dict: 0.0010
Trainer.train_iteration: 0.0010
Traceback (most recent call last):
  File "c:\users\tim\.pyenv\pyenv-win\versions\3.8.10\lib\runpy.py", line 194, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "c:\users\tim\.pyenv\pyenv-win\versions\3.8.10\lib\runpy.py", line 87, in _run_code
    exec(code, run_globals)
  File "C:\Users\tim\.pyenv\pyenv-win\versions\3.8.10\Scripts\ns-train.exe\__main__.py", line 7, in <module>
  File "C:\Users\tim\repos\nerfstudio\nerfstudio\scripts\train.py", line 261, in entrypoint
    main(
  File "C:\Users\tim\repos\nerfstudio\nerfstudio\scripts\train.py", line 246, in main
    launch(
  File "C:\Users\tim\repos\nerfstudio\nerfstudio\scripts\train.py", line 189, in launch
    main_func(local_rank=0, world_size=world_size, config=config)
  File "C:\Users\tim\repos\nerfstudio\nerfstudio\scripts\train.py", line 100, in train_loop
    trainer.train()
  File "C:\Users\tim\repos\nerfstudio\nerfstudio\engine\trainer.py", line 255, in train
    loss, loss_dict, metrics_dict = self.train_iteration(step)
  File "C:\Users\tim\repos\nerfstudio\nerfstudio\utils\profiler.py", line 127, in inner
    out = func(*args, **kwargs)
  File "C:\Users\tim\repos\nerfstudio\nerfstudio\engine\trainer.py", line 474, in train_iteration
    _, loss_dict, metrics_dict = self.pipeline.get_train_loss_dict(step=step)
  File "C:\Users\tim\repos\nerfstudio\nerfstudio\utils\profiler.py", line 127, in inner
    out = func(*args, **kwargs)
  File "C:\Users\tim\repos\nerfstudio\nerfstudio\pipelines\base_pipeline.py", line 298, in get_train_loss_dict
    ray_bundle, batch = self.datamanager.next_train(step)
  File "C:\Users\tim\repos\nerfstudio\nerfstudio\data\datamanagers\base_datamanager.py", line 540, in next_train
    batch = self.train_pixel_sampler.sample(image_batch)
  File "C:\Users\tim\repos\nerfstudio\nerfstudio\data\pixel_samplers.py", line 270, in sample
    pixel_batch = self.collate_image_dataset_batch_list(
  File "C:\Users\tim\repos\nerfstudio\nerfstudio\data\pixel_samplers.py", line 243, in collate_image_dataset_batch_list
    collated_batch = {
  File "C:\Users\tim\repos\nerfstudio\nerfstudio\data\pixel_samplers.py", line 244, in <dictcomp>
    key: value[c, y, x]
TypeError: list indices must be integers or slices, not tuple

nerfstudio-project / nerfstudio

Failure in pixel sampler when using depth-nerfacto #2386