hi i can start traning but failed with certain epoch:
==> Start Training Epoch 5, lr=0.009738 ...
loss=0.005879 (0.006519) lr=0.009618: : 75% 158/210 [00:03<00:01, 41.08it/s]/home/ren3/miniconda3/envs/nerf2mesh/lib/python3.9/site-packages/torch/autograd/init.py:200: UserWarning: Error detected in MmBackward0. Traceback of forward call that caused the error:
File "/media/ren3/disk1_ssd/ruijie/nerf2mesh-main/main.py", line 252, in
trainer.train(train_loader, valid_loader, max_epoch)
File "/media/ren3/disk1_ssd/ruijie/nerf2mesh-main/nerf/utils.py", line 932, in train
self.train_one_epoch(train_loader)
File "/media/ren3/disk1_ssd/ruijie/nerf2mesh-main/nerf/utils.py", line 1165, in train_one_epoch
preds, truths, loss_net = self.train_step(data)
File "/media/ren3/disk1_ssd/ruijie/nerf2mesh-main/nerf/utils.py", line 676, in train_step
outputs = self.model.render(rays_o, rays_d, index=index, staged=False, bg_color=bg_color, perturb=True, cam_near_far=cam_near_far, shading=shading, vars(self.opt))
File "/media/ren3/disk1_ssd/ruijie/nerf2mesh-main/nerf/renderer.py", line 722, in render
sigmas, rgbs, speculars = self(xyzs, dirs, ind_code, shading)
File "/home/ren3/miniconda3/envs/nerf2mesh/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, *kwargs)
File "/media/ren3/disk1_ssd/ruijie/nerf2mesh-main/nerf/network.py", line 87, in forward
color, specular = self.rgb(x, d, c, shading)
File "/media/ren3/disk1_ssd/ruijie/nerf2mesh-main/nerf/network.py", line 182, in rgb
specular = self.specular_net(torch.cat([d, geo_feat[..., 3:]], dim=-1))
File "/home/ren3/miniconda3/envs/nerf2mesh/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(args, kwargs)
File "/media/ren3/disk1_ssd/ruijie/nerf2mesh-main/nerf/network.py", line 48, in forward
x = self.netl
File "/home/ren3/miniconda3/envs/nerf2mesh/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/home/ren3/miniconda3/envs/nerf2mesh/lib/python3.9/site-packages/torch/nn/modules/linear.py", line 114, in forward
return F.linear(input, self.weight, self.bias)
(Triggered internally at ../torch/csrc/autograd/python_anomaly_mode.cpp:114.)
Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
Traceback (most recent call last):
File "/media/ren3/disk1_ssd/ruijie/nerf2mesh-main/main.py", line 252, in
trainer.train(train_loader, valid_loader, max_epoch)
File "/media/ren3/disk1_ssd/ruijie/nerf2mesh-main/nerf/utils.py", line 932, in train
self.train_one_epoch(train_loader)
File "/media/ren3/disk1_ssd/ruijie/nerf2mesh-main/nerf/utils.py", line 1172, in train_one_epoch
self.scaler.scale(loss).backward()
File "/home/ren3/miniconda3/envs/nerf2mesh/lib/python3.9/site-packages/torch/_tensor.py", line 487, in backward
torch.autograd.backward(
File "/home/ren3/miniconda3/envs/nerf2mesh/lib/python3.9/site-packages/torch/autograd/init.py", line 200, in backward
Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
RuntimeError: Function 'MmBackward0' returned nan values in its 0th output.
hi i can start traning but failed with certain epoch:
==> Start Training Epoch 5, lr=0.009738 ... loss=0.005879 (0.006519) lr=0.009618: : 75% 158/210 [00:03<00:01, 41.08it/s]/home/ren3/miniconda3/envs/nerf2mesh/lib/python3.9/site-packages/torch/autograd/init.py:200: UserWarning: Error detected in MmBackward0. Traceback of forward call that caused the error: File "/media/ren3/disk1_ssd/ruijie/nerf2mesh-main/main.py", line 252, in
trainer.train(train_loader, valid_loader, max_epoch)
File "/media/ren3/disk1_ssd/ruijie/nerf2mesh-main/nerf/utils.py", line 932, in train
self.train_one_epoch(train_loader)
File "/media/ren3/disk1_ssd/ruijie/nerf2mesh-main/nerf/utils.py", line 1165, in train_one_epoch
preds, truths, loss_net = self.train_step(data)
File "/media/ren3/disk1_ssd/ruijie/nerf2mesh-main/nerf/utils.py", line 676, in train_step
outputs = self.model.render(rays_o, rays_d, index=index, staged=False, bg_color=bg_color, perturb=True, cam_near_far=cam_near_far, shading=shading, vars(self.opt))
File "/media/ren3/disk1_ssd/ruijie/nerf2mesh-main/nerf/renderer.py", line 722, in render
sigmas, rgbs, speculars = self(xyzs, dirs, ind_code, shading)
File "/home/ren3/miniconda3/envs/nerf2mesh/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, *kwargs)
File "/media/ren3/disk1_ssd/ruijie/nerf2mesh-main/nerf/network.py", line 87, in forward
color, specular = self.rgb(x, d, c, shading)
File "/media/ren3/disk1_ssd/ruijie/nerf2mesh-main/nerf/network.py", line 182, in rgb
specular = self.specular_net(torch.cat([d, geo_feat[..., 3:]], dim=-1))
File "/home/ren3/miniconda3/envs/nerf2mesh/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(args, kwargs)
File "/media/ren3/disk1_ssd/ruijie/nerf2mesh-main/nerf/network.py", line 48, in forward
x = self.netl
File "/home/ren3/miniconda3/envs/nerf2mesh/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/home/ren3/miniconda3/envs/nerf2mesh/lib/python3.9/site-packages/torch/nn/modules/linear.py", line 114, in forward
return F.linear(input, self.weight, self.bias)
(Triggered internally at ../torch/csrc/autograd/python_anomaly_mode.cpp:114.)
Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
Traceback (most recent call last):
File "/media/ren3/disk1_ssd/ruijie/nerf2mesh-main/main.py", line 252, in
trainer.train(train_loader, valid_loader, max_epoch)
File "/media/ren3/disk1_ssd/ruijie/nerf2mesh-main/nerf/utils.py", line 932, in train
self.train_one_epoch(train_loader)
File "/media/ren3/disk1_ssd/ruijie/nerf2mesh-main/nerf/utils.py", line 1172, in train_one_epoch
self.scaler.scale(loss).backward()
File "/home/ren3/miniconda3/envs/nerf2mesh/lib/python3.9/site-packages/torch/_tensor.py", line 487, in backward
torch.autograd.backward(
File "/home/ren3/miniconda3/envs/nerf2mesh/lib/python3.9/site-packages/torch/autograd/init.py", line 200, in backward
Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
RuntimeError: Function 'MmBackward0' returned nan values in its 0th output.