Traceback (most recent call last):
File "train.py", line 412, in <module>
trainer.fit(system, ckpt_path=hparams.ckpt_path)
File "/root/anaconda3/envs/omniseg3d/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 608, in fit
call._call_and_handle_interrupt(
File "/root/anaconda3/envs/omniseg3d/lib/python3.8/site-packages/pytorch_lightning/trainer/call.py", line 38, in _call_and_handle_interrupt
return trainer_fn(*args, **kwargs)
File "/root/anaconda3/envs/omniseg3d/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 650, in _fit_impl
self._run(model, ckpt_path=self.ckpt_path)
File "/root/anaconda3/envs/omniseg3d/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 1112, in _run
results = self._run_stage()
File "/root/anaconda3/envs/omniseg3d/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 1191, in _run_stage
self._run_train()
File "/root/anaconda3/envs/omniseg3d/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 1214, in _run_train
self.fit_loop.run()
File "/root/anaconda3/envs/omniseg3d/lib/python3.8/site-packages/pytorch_lightning/loops/loop.py", line 199, in run
self.advance(*args, **kwargs)
File "/root/anaconda3/envs/omniseg3d/lib/python3.8/site-packages/pytorch_lightning/loops/fit_loop.py", line 267, in advance
self._outputs = self.epoch_loop.run(self._data_fetcher)
File "/root/anaconda3/envs/omniseg3d/lib/python3.8/site-packages/pytorch_lightning/loops/loop.py", line 199, in run
self.advance(*args, **kwargs)
File "/root/anaconda3/envs/omniseg3d/lib/python3.8/site-packages/pytorch_lightning/loops/epoch/training_epoch_loop.py", line 213, in advance
batch_output = self.batch_loop.run(kwargs)
File "/root/anaconda3/envs/omniseg3d/lib/python3.8/site-packages/pytorch_lightning/loops/loop.py", line 199, in run
self.advance(*args, **kwargs)
File "/root/anaconda3/envs/omniseg3d/lib/python3.8/site-packages/pytorch_lightning/loops/batch/training_batch_loop.py", line 88, in advance
outputs = self.optimizer_loop.run(optimizers, kwargs)
File "/root/anaconda3/envs/omniseg3d/lib/python3.8/site-packages/pytorch_lightning/loops/loop.py", line 199, in run
self.advance(*args, **kwargs)
File "/root/anaconda3/envs/omniseg3d/lib/python3.8/site-packages/pytorch_lightning/loops/optimization/optimizer_loop.py", line 202, in advance
result = self._run_optimization(kwargs, self._optimizers[self.optim_progress.optimizer_position])
File "/root/anaconda3/envs/omniseg3d/lib/python3.8/site-packages/pytorch_lightning/loops/optimization/optimizer_loop.py", line 249, in _run_optimization
self._optimizer_step(optimizer, opt_idx, kwargs.get("batch_idx", 0), closure)
File "/root/anaconda3/envs/omniseg3d/lib/python3.8/site-packages/pytorch_lightning/loops/optimization/optimizer_loop.py", line 370, in _optimizer_step
self.trainer._call_lightning_module_hook(
File "/root/anaconda3/envs/omniseg3d/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 1356, in _call_lightning_module_hook
output = fn(*args, **kwargs)
File "/root/anaconda3/envs/omniseg3d/lib/python3.8/site-packages/pytorch_lightning/core/module.py", line 1742, in optimizer_step
optimizer.step(closure=optimizer_closure)
File "/root/anaconda3/envs/omniseg3d/lib/python3.8/site-packages/pytorch_lightning/core/optimizer.py", line 169, in step
step_output = self._strategy.optimizer_step(self._optimizer, self._optimizer_idx, closure, **kwargs)
File "/root/anaconda3/envs/omniseg3d/lib/python3.8/site-packages/pytorch_lightning/strategies/strategy.py", line 234, in optimizer_step
return self.precision_plugin.optimizer_step(
File "/root/anaconda3/envs/omniseg3d/lib/python3.8/site-packages/pytorch_lightning/plugins/precision/native_amp.py", line 75, in optimizer_step
closure_result = closure()
File "/root/anaconda3/envs/omniseg3d/lib/python3.8/site-packages/pytorch_lightning/loops/optimization/optimizer_loop.py", line 149, in __call__
self._result = self.closure(*args, **kwargs)
File "/root/anaconda3/envs/omniseg3d/lib/python3.8/site-packages/pytorch_lightning/loops/optimization/optimizer_loop.py", line 135, in closure
step_output = self._step_fn()
File "/root/anaconda3/envs/omniseg3d/lib/python3.8/site-packages/pytorch_lightning/loops/optimization/optimizer_loop.py", line 419, in _training_step
training_step_output = self.trainer._call_strategy_hook("training_step", *kwargs.values())
File "/root/anaconda3/envs/omniseg3d/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 1494, in _call_strategy_hook
output = fn(*args, **kwargs)
File "/root/anaconda3/envs/omniseg3d/lib/python3.8/site-packages/pytorch_lightning/strategies/strategy.py", line 378, in training_step
return self.model.training_step(*args, **kwargs)
File "train.py", line 209, in training_step
results = self(batch, split='train')
File "/root/anaconda3/envs/omniseg3d/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1110, in _call_impl
return forward_call(*input, **kwargs)
File "train.py", line 123, in forward
return render(self.model, rays_o, rays_d, **kwargs)
File "/root/anaconda3/envs/omniseg3d/lib/python3.8/site-packages/torch/autocast_mode.py", line 12, in decorate_autocast
return func(*args, **kwargs)
File "/root/shy/OmniSeg3D/models/rendering.py", line 35, in render
results = render_func(model, rays_o, rays_d, hits_t, **kwargs)
File "/root/shy/OmniSeg3D/models/rendering.py", line 196, in __render_rays_train
sigmas, rgbs = model(xyzs, dirs, **kwargs)
File "/root/anaconda3/envs/omniseg3d/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1110, in _call_impl
return forward_call(*input, **kwargs)
File "/root/shy/OmniSeg3D/models/networks.py", line 190, in forward
sigmas, h = self.density(x, return_feat=True)
File "/root/shy/OmniSeg3D/models/networks.py", line 150, in density
inside_aabb = ((x>=self.aabb_min-self.aabb_tol)&
RuntimeError: CUDA error: invalid configuration argument
the error occurs at models/networks.py
def density(self, x, return_feat=False):
"""
Inputs:
x: (N, 3) xyz in [-scale, scale]
return_feat: whether to return intermediate feature
Outputs:
sigmas: (N)
"""
x1 = (x-self.xyz_min)/(self.xyz_max-self.xyz_min)
# h = self.xyz_encoder(x)
e = self.xyz_encoder(x1)
h = self.sigma_net(e)
sigmas = TruncExp.apply(h[:, 0])
-> inside_aabb = ((x>=self.aabb_min-self.aabb_tol)&
(x<=self.aabb_max+self.aabb_tol)).all(1)
sigmas = torch.where(inside_aabb, sigmas, torch.tensor(0.0).float().to(sigmas.device))
if return_feat: return sigmas, h
# if return_feat: return sigmas, h, e
return sigmas
because x is torch.tensor([], size=(0,3)). And why x is empty. Because in models/rendering.py
returns xyzs is empty, also dirs is empty. And why they are empty. Because in models/custom_functions.py
@staticmethod
@custom_fwd(cast_inputs=torch.float32)
def forward(ctx, rays_o, rays_d, hits_t,
density_bitfield, cascades, scale, exp_step_factor,
grid_size, max_samples):
# noise to perturb the first sample of each ray
noise = torch.rand_like(rays_o[:, 0])
rays_a, xyzs, dirs, deltas, ts, counter = \
vren.raymarching_train(
rays_o, rays_d, hits_t,
density_bitfield, cascades, scale,
exp_step_factor, noise, grid_size, max_samples)
-> total_samples = counter[0] # total samples for all rays
# remove redundant output
xyzs = xyzs[:total_samples]
dirs = dirs[:total_samples]
deltas = deltas[:total_samples]
ts = ts[:total_samples]
ctx.save_for_backward(rays_a, ts)
return rays_a, xyzs, dirs, deltas, ts, total_samples
the total_samples is zero, so that it slice an empty tensor. And why vren.raymarching_train return counter[0] which is 0.
Because in models/csrc/raymarching.cu
// first pass: compute the number of samples on the ray
float t = t1; int N_samples = 0;
// if t1 < 0 (no hit) this loop will be skipped (N_samples will be 0)
while (0<=t && t<t2 && N_samples<max_samples){
const float x = ox+t*dx, y = oy+t*dy, z = oz+t*dz;
...
there is no hit, so the sample count is 0.
My dataset is a single object, the background is empty, so some rays hit nothing.
But why it got into RuntimeError? I think NeRF can deal with these situation.
When I train on a custom dataset, I got
the error occurs at
models/networks.py
because
x
istorch.tensor([], size=(0,3))
. And whyx
is empty. Because inmodels/rendering.py
returns
xyzs
is empty, alsodirs
is empty. And why they are empty. Because inmodels/custom_functions.py
the
total_samples
is zero, so that it slice an empty tensor. And whyvren.raymarching_train
returncounter[0]
which is0
. Because inmodels/csrc/raymarching.cu
there is no hit, so the sample count is 0.
My dataset is a single object, the background is empty, so some rays hit nothing.
But why it got into RuntimeError? I think NeRF can deal with these situation.