Megvii-BaseDetection / BEVDepth

Official code for BEVDepth.
MIT License
710 stars 98 forks source link

Depth aggregation autocast #163

Open gaguai opened 1 year ago

gaguai commented 1 year ago

I am having an issue with the depth aggregation module.

Traceback (most recent call last): File "/mnt/BEVDepth/bevdepth/exps/nuscenes/mv/bev_depth_lss_r50_256x704_128x128_20e_cbgs_2key_da_ema.py", line 32, in extra_trainer_config_args={'epochs': 20}) File "/mnt/BEVDepth/bevdepth/exps/base_cli.py", line 78, in run_cli trainer.fit(model) File "/opt/conda/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py", line 772, in fit self._fit_impl, model, train_dataloaders, val_dataloaders, datamodule, ckpt_path File "/opt/conda/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py", line 724, in _call_and_handle_interrupt return trainer_fn(*args, kwargs) File "/opt/conda/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py", line 812, in _fit_impl results = self._run(model, ckpt_path=self.ckpt_path) File "/opt/conda/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py", line 1237, in _run results = self._run_stage() File "/opt/conda/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py", line 1324, in _run_stage return self._run_train() File "/opt/conda/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py", line 1354, in _run_train self.fit_loop.run() File "/opt/conda/lib/python3.7/site-packages/pytorch_lightning/loops/base.py", line 204, in run self.advance(*args, *kwargs) File "/opt/conda/lib/python3.7/site-packages/pytorch_lightning/loops/fit_loop.py", line 269, in advance self._outputs = self.epoch_loop.run(self._data_fetcher) File "/opt/conda/lib/python3.7/site-packages/pytorch_lightning/loops/base.py", line 204, in run self.advance(args, kwargs) File "/opt/conda/lib/python3.7/site-packages/pytorch_lightning/loops/epoch/training_epoch_loop.py", line 208, in advance batch_output = self.batch_loop.run(batch, batch_idx) File "/opt/conda/lib/python3.7/site-packages/pytorch_lightning/loops/base.py", line 204, in run self.advance(*args, kwargs) File "/opt/conda/lib/python3.7/site-packages/pytorch_lightning/loops/batch/training_batch_loop.py", line 88, in advance outputs = self.optimizer_loop.run(split_batch, optimizers, batch_idx) File "/opt/conda/lib/python3.7/site-packages/pytorch_lightning/loops/base.py", line 204, in run self.advance(*args, *kwargs) File "/opt/conda/lib/python3.7/site-packages/pytorch_lightning/loops/optimization/optimizer_loop.py", line 207, in advance self.optimizer_idx, File "/opt/conda/lib/python3.7/site-packages/pytorch_lightning/loops/optimization/optimizer_loop.py", line 256, in _run_optimization self._optimizer_step(optimizer, opt_idx, batch_idx, closure) File "/opt/conda/lib/python3.7/site-packages/pytorch_lightning/loops/optimization/optimizer_loop.py", line 378, in _optimizer_step using_lbfgs=is_lbfgs, File "/opt/conda/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py", line 1596, in _call_lightning_module_hook output = fn(args, kwargs) File "/opt/conda/lib/python3.7/site-packages/pytorch_lightning/core/lightning.py", line 1625, in optimizer_step optimizer.step(closure=optimizer_closure) File "/opt/conda/lib/python3.7/site-packages/pytorch_lightning/core/optimizer.py", line 168, in step step_output = self._strategy.optimizer_step(self._optimizer, self._optimizer_idx, closure, kwargs) File "/opt/conda/lib/python3.7/site-packages/pytorch_lightning/strategies/ddp.py", line 278, in optimizer_step optimizer_output = super().optimizer_step(optimizer, opt_idx, closure, model, kwargs) File "/opt/conda/lib/python3.7/site-packages/pytorch_lightning/strategies/strategy.py", line 193, in optimizer_step return self.precision_plugin.optimizer_step(model, optimizer, opt_idx, closure, kwargs) File "/opt/conda/lib/python3.7/site-packages/pytorch_lightning/plugins/precision/native_amp.py", line 85, in optimizer_step closure_result = closure() File "/opt/conda/lib/python3.7/site-packages/pytorch_lightning/loops/optimization/optimizer_loop.py", line 148, in call self._result = self.closure(*args, kwargs) File "/opt/conda/lib/python3.7/site-packages/pytorch_lightning/loops/optimization/optimizer_loop.py", line 134, in closure step_output = self._step_fn() File "/opt/conda/lib/python3.7/site-packages/pytorch_lightning/loops/optimization/optimizer_loop.py", line 427, in _training_step training_step_output = self.trainer._call_strategy_hook("training_step", step_kwargs.values()) File "/opt/conda/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py", line 1766, in _call_strategy_hook output = fn(args, kwargs) File "/opt/conda/lib/python3.7/site-packages/pytorch_lightning/strategies/ddp.py", line 344, in training_step return self.model(*args, kwargs) File "/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl return forward_call(*input, kwargs) File "/opt/conda/lib/python3.7/site-packages/torch/nn/parallel/distributed.py", line 799, in forward output = self.module(*inputs[0], *kwargs[0]) File "/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl return forward_call(input, kwargs) File "/opt/conda/lib/python3.7/site-packages/pytorch_lightning/overrides/base.py", line 82, in forward output = self.module.training_step(*inputs, kwargs) File "/mnt/BEVDepth/bevdepth/exps/nuscenes/base_exp.py", line 249, in training_step preds, depth_preds = self(sweep_imgs, mats) File "/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl return forward_call(*input, *kwargs) File "/mnt/BEVDepth/bevdepth/exps/nuscenes/base_exp.py", line 239, in forward return self.model(sweep_imgs, mats) File "/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl return forward_call(input, kwargs) File "/mnt/BEVDepth/bevdepth/models/base_bev_depth.py", line 59, in forward is_return_depth=True) File "/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl return forward_call(*input, kwargs) File "/mnt/BEVDepth/bevdepth/layers/backbones/base_lss_fpn.py", line 594, in forward is_return_depth=is_return_depth) File "/mnt/BEVDepth/bevdepth/layers/backbones/base_lss_fpn.py", line 530, in _forward_single_sweep img_feat_with_depth = self._forward_voxel_net(img_feat_with_depth) File "/mnt/BEVDepth/bevdepth/layers/backbones/base_lss_fpn.py", line 400, in _forward_voxel_net self.depth_aggregation_net(img_feat_with_depth).view( File "/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl return forward_call(*input, *kwargs) File "/opt/conda/lib/python3.7/site-packages/torch/cuda/amp/autocast_mode.py", line 141, in decorate_autocast return func(args, kwargs) File "/mnt/BEVDepth/bevdepth/layers/backbones/base_lss_fpn.py", line 309, in forward x = self.reduce_conv(x) File "/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl return forward_call(*input, *kwargs) File "/opt/conda/lib/python3.7/site-packages/torch/nn/modules/container.py", line 139, in forward input = module(input) File "/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl return forward_call(input, **kwargs) File "/opt/conda/lib/python3.7/site-packages/torch/nn/modules/conv.py", line 443, in forward return self._conv_forward(input, self.weight, self.bias) File "/opt/conda/lib/python3.7/site-packages/torch/nn/modules/conv.py", line 440, in _conv_forward self.padding, self.dilation, self.groups) RuntimeError: Input type (torch.cuda.HalfTensor) and weight type (torch.cuda.FloatTensor) should be the same

zengjichen commented 1 year ago

maybe you should set @autocast(False) closed in base_lss_fpn.py", line 309,

YuhanZhou521 commented 6 months ago

I am having an issue with the depth aggregation module.

Traceback (most recent call last): File "/mnt/BEVDepth/bevdepth/exps/nuscenes/mv/bev_depth_lss_r50_256x704_128x128_20e_cbgs_2key_da_ema.py", line 32, in extra_trainer_config_args={'epochs': 20}) File "/mnt/BEVDepth/bevdepth/exps/base_cli.py", line 78, in run_cli trainer.fit(model) File "/opt/conda/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py", line 772, in fit self._fit_impl, model, train_dataloaders, val_dataloaders, datamodule, ckpt_path File "/opt/conda/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py", line 724, in _call_and_handle_interrupt return trainer_fn(*args, kwargs) File "/opt/conda/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py", line 812, in _fit_impl results = self._run(model, ckpt_path=self.ckpt_path) File "/opt/conda/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py", line 1237, in _run results = self._run_stage() File "/opt/conda/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py", line 1324, in _run_stage return self._run_train() File "/opt/conda/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py", line 1354, in _run_train self.fit_loop.run() File "/opt/conda/lib/python3.7/site-packages/pytorch_lightning/loops/base.py", line 204, in run self.advance(*args, *kwargs) File "/opt/conda/lib/python3.7/site-packages/pytorch_lightning/loops/fit_loop.py", line 269, in advance self._outputs = self.epoch_loop.run(self._data_fetcher) File "/opt/conda/lib/python3.7/site-packages/pytorch_lightning/loops/base.py", line 204, in run self.advance(args, kwargs) File "/opt/conda/lib/python3.7/site-packages/pytorch_lightning/loops/epoch/training_epoch_loop.py", line 208, in advance batch_output = self.batch_loop.run(batch, batch_idx) File "/opt/conda/lib/python3.7/site-packages/pytorch_lightning/loops/base.py", line 204, in run self.advance(*args, kwargs) File "/opt/conda/lib/python3.7/site-packages/pytorch_lightning/loops/batch/training_batch_loop.py", line 88, in advance outputs = self.optimizer_loop.run(split_batch, optimizers, batch_idx) File "/opt/conda/lib/python3.7/site-packages/pytorch_lightning/loops/base.py", line 204, in run self.advance(*args, *kwargs) File "/opt/conda/lib/python3.7/site-packages/pytorch_lightning/loops/optimization/optimizer_loop.py", line 207, in advance self.optimizer_idx, File "/opt/conda/lib/python3.7/site-packages/pytorch_lightning/loops/optimization/optimizer_loop.py", line 256, in _run_optimization self._optimizer_step(optimizer, opt_idx, batch_idx, closure) File "/opt/conda/lib/python3.7/site-packages/pytorch_lightning/loops/optimization/optimizer_loop.py", line 378, in _optimizer_step using_lbfgs=is_lbfgs, File "/opt/conda/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py", line 1596, in _call_lightning_module_hook output = fn(args, kwargs) File "/opt/conda/lib/python3.7/site-packages/pytorch_lightning/core/lightning.py", line 1625, in optimizer_step optimizer.step(closure=optimizer_closure) File "/opt/conda/lib/python3.7/site-packages/pytorch_lightning/core/optimizer.py", line 168, in step step_output = self._strategy.optimizer_step(self._optimizer, self._optimizer_idx, closure, kwargs) File "/opt/conda/lib/python3.7/site-packages/pytorch_lightning/strategies/ddp.py", line 278, in optimizer_step optimizer_output = super().optimizer_step(optimizer, opt_idx, closure, model, kwargs) File "/opt/conda/lib/python3.7/site-packages/pytorch_lightning/strategies/strategy.py", line 193, in optimizer_step return self.precision_plugin.optimizer_step(model, optimizer, opt_idx, closure, kwargs) File "/opt/conda/lib/python3.7/site-packages/pytorch_lightning/plugins/precision/native_amp.py", line 85, in optimizer_step closure_result = closure() File "/opt/conda/lib/python3.7/site-packages/pytorch_lightning/loops/optimization/optimizer_loop.py", line 148, in call self._result = self.closure(*args, kwargs) File "/opt/conda/lib/python3.7/site-packages/pytorch_lightning/loops/optimization/optimizer_loop.py", line 134, in closure step_output = self._step_fn() File "/opt/conda/lib/python3.7/site-packages/pytorch_lightning/loops/optimization/optimizer_loop.py", line 427, in _training_step training_step_output = self.trainer._call_strategy_hook("training_step", step_kwargs.values()) File "/opt/conda/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py", line 1766, in _call_strategy_hook output = fn(args, kwargs) File "/opt/conda/lib/python3.7/site-packages/pytorch_lightning/strategies/ddp.py", line 344, in training_step return self.model(*args, kwargs) File "/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl return forward_call(*input, kwargs) File "/opt/conda/lib/python3.7/site-packages/torch/nn/parallel/distributed.py", line 799, in forward output = self.module(*inputs[0], *kwargs[0]) File "/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl return forward_call(input, kwargs) File "/opt/conda/lib/python3.7/site-packages/pytorch_lightning/overrides/base.py", line 82, in forward output = self.module.training_step(*inputs, kwargs) File "/mnt/BEVDepth/bevdepth/exps/nuscenes/base_exp.py", line 249, in training_step preds, depth_preds = self(sweep_imgs, mats) File "/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl return forward_call(*input, *kwargs) File "/mnt/BEVDepth/bevdepth/exps/nuscenes/base_exp.py", line 239, in forward return self.model(sweep_imgs, mats) File "/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl return forward_call(input, kwargs) File "/mnt/BEVDepth/bevdepth/models/base_bev_depth.py", line 59, in forward is_return_depth=True) File "/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl return forward_call(*input, kwargs) File "/mnt/BEVDepth/bevdepth/layers/backbones/base_lss_fpn.py", line 594, in forward is_return_depth=is_return_depth) File "/mnt/BEVDepth/bevdepth/layers/backbones/base_lss_fpn.py", line 530, in _forward_single_sweep img_feat_with_depth = self._forward_voxel_net(img_feat_with_depth) File "/mnt/BEVDepth/bevdepth/layers/backbones/base_lss_fpn.py", line 400, in _forward_voxel_net self.depth_aggregation_net(img_feat_with_depth).view( File "/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl return forward_call(*input, *kwargs) File "/opt/conda/lib/python3.7/site-packages/torch/cuda/amp/autocast_mode.py", line 141, in decorate_autocast return func(args, kwargs) File "/mnt/BEVDepth/bevdepth/layers/backbones/base_lss_fpn.py", line 309, in forward x = self.reduce_conv(x) File "/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl return forward_call(*input, *kwargs) File "/opt/conda/lib/python3.7/site-packages/torch/nn/modules/container.py", line 139, in forward input = module(input) File "/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl return forward_call(input, **kwargs) File "/opt/conda/lib/python3.7/site-packages/torch/nn/modules/conv.py", line 443, in forward return self._conv_forward(input, self.weight, self.bias) File "/opt/conda/lib/python3.7/site-packages/torch/nn/modules/conv.py", line 440, in _conv_forward self.padding, self.dilation, self.groups) RuntimeError: Input type (torch.cuda.HalfTensor) and weight type (torch.cuda.FloatTensor) should be the same

May I ask if you have resolved this error?