Hi, I tried to run MonoFlex Code at pytorch==1.4.0 torchvision==0.5.0 cudatoolkit=10.1
but I got these error logs:
`[2021-07-26 09:43:56,528] monoflex.trainer INFO: Start training
Traceback (most recent call last):
File "tools/plain_train_net.py", line 160, in
args=(args,),
File "/home/yuhan/MonoFlex/engine/launch.py", line 54, in launch
daemon=False,
File "/home/yuhan/anaconda3/envs/monoflex/lib/python3.7/site-packages/torch/multiprocessing/spawn.py", line 171, in spawn
while not spawn_context.join():
File "/home/yuhan/anaconda3/envs/monoflex/lib/python3.7/site-packages/torch/multiprocessing/spawn.py", line 118, in join
raise Exception(msg)
Exception:
-- Process 1 terminated with the following error:
Traceback (most recent call last):
File "/home/yuhan/anaconda3/envs/monoflex/lib/python3.7/site-packages/torch/multiprocessing/spawn.py", line 19, in _wrap
fn(i, args)
File "/home/yuhan/MonoFlex/engine/launch.py", line 89, in _distributed_worker
main_func(args)
File "/home/yuhan/MonoFlex/tools/plain_train_net.py", line 139, in main
train(cfg, model, device, distributed)
File "/home/yuhan/MonoFlex/tools/plain_train_net.py", line 83, in train
arguments,
File "/home/yuhan/MonoFlex/engine/trainer.py", line 109, in do_train
loss_dict, log_loss_dict = model(images, targets)
File "/home/yuhan/anaconda3/envs/monoflex/lib/python3.7/site-packages/torch/nn/modules/module.py", line 532, in call
result = self.forward(*input, kwargs)
File "/home/yuhan/anaconda3/envs/monoflex/lib/python3.7/site-packages/torch/nn/parallel/distributed.py", line 447, in forward
output = self.module(*inputs[0], *kwargs[0])
File "/home/yuhan/anaconda3/envs/monoflex/lib/python3.7/site-packages/torch/nn/modules/module.py", line 532, in call
result = self.forward(input, kwargs)
File "/home/yuhan/MonoFlex/model/detector.py", line 34, in forward
loss_dict, log_loss_dict = self.heads(features, targets)
File "/home/yuhan/anaconda3/envs/monoflex/lib/python3.7/site-packages/torch/nn/modules/module.py", line 532, in call
result = self.forward(*input, kwargs)
File "/home/yuhan/MonoFlex/model/head/detector_head.py", line 18, in forward
x = self.predictor(features, targets)
File "/home/yuhan/anaconda3/envs/monoflex/lib/python3.7/site-packages/torch/nn/modules/module.py", line 532, in call
result = self.forward(*input, *kwargs)
File "/home/yuhan/MonoFlex/model/head/detector_predictor.py", line 125, in forward
feature_cls = self.class_head:-1
File "/home/yuhan/anaconda3/envs/monoflex/lib/python3.7/site-packages/torch/nn/modules/module.py", line 532, in call
result = self.forward(input, kwargs)
File "/home/yuhan/anaconda3/envs/monoflex/lib/python3.7/site-packages/torch/nn/modules/container.py", line 100, in forward
input = module(input)
File "/home/yuhan/anaconda3/envs/monoflex/lib/python3.7/site-packages/torch/nn/modules/module.py", line 532, in call
result = self.forward(*input, **kwargs)
File "/home/yuhan/anaconda3/envs/monoflex/lib/python3.7/site-packages/inplace_abn/abn.py", line 247, in forward
self.activation_param,
File "/home/yuhan/anaconda3/envs/monoflex/lib/python3.7/site-packages/inplace_abn/functions.py", line 249, in inplace_abn
None,
File "/home/yuhan/anaconda3/envs/monoflex/lib/python3.7/site-packages/inplaceabn/functions.py", line 107, in forward
count = count.to(dtype=var.dtype)
RuntimeError: CUDA error: invalid device function`
Hi, I tried to run MonoFlex Code at pytorch==1.4.0 torchvision==0.5.0 cudatoolkit=10.1 but I got these error logs:
`[2021-07-26 09:43:56,528] monoflex.trainer INFO: Start training Traceback (most recent call last): File "tools/plain_train_net.py", line 160, in
args=(args,),
File "/home/yuhan/MonoFlex/engine/launch.py", line 54, in launch
daemon=False,
File "/home/yuhan/anaconda3/envs/monoflex/lib/python3.7/site-packages/torch/multiprocessing/spawn.py", line 171, in spawn
while not spawn_context.join():
File "/home/yuhan/anaconda3/envs/monoflex/lib/python3.7/site-packages/torch/multiprocessing/spawn.py", line 118, in join
raise Exception(msg)
Exception:
-- Process 1 terminated with the following error: Traceback (most recent call last): File "/home/yuhan/anaconda3/envs/monoflex/lib/python3.7/site-packages/torch/multiprocessing/spawn.py", line 19, in _wrap fn(i, args) File "/home/yuhan/MonoFlex/engine/launch.py", line 89, in _distributed_worker main_func(args) File "/home/yuhan/MonoFlex/tools/plain_train_net.py", line 139, in main train(cfg, model, device, distributed) File "/home/yuhan/MonoFlex/tools/plain_train_net.py", line 83, in train arguments, File "/home/yuhan/MonoFlex/engine/trainer.py", line 109, in do_train loss_dict, log_loss_dict = model(images, targets) File "/home/yuhan/anaconda3/envs/monoflex/lib/python3.7/site-packages/torch/nn/modules/module.py", line 532, in call result = self.forward(*input, kwargs) File "/home/yuhan/anaconda3/envs/monoflex/lib/python3.7/site-packages/torch/nn/parallel/distributed.py", line 447, in forward output = self.module(*inputs[0], *kwargs[0]) File "/home/yuhan/anaconda3/envs/monoflex/lib/python3.7/site-packages/torch/nn/modules/module.py", line 532, in call result = self.forward(input, kwargs) File "/home/yuhan/MonoFlex/model/detector.py", line 34, in forward loss_dict, log_loss_dict = self.heads(features, targets) File "/home/yuhan/anaconda3/envs/monoflex/lib/python3.7/site-packages/torch/nn/modules/module.py", line 532, in call result = self.forward(*input, kwargs) File "/home/yuhan/MonoFlex/model/head/detector_head.py", line 18, in forward x = self.predictor(features, targets) File "/home/yuhan/anaconda3/envs/monoflex/lib/python3.7/site-packages/torch/nn/modules/module.py", line 532, in call result = self.forward(*input, *kwargs) File "/home/yuhan/MonoFlex/model/head/detector_predictor.py", line 125, in forward feature_cls = self.class_head:-1 File "/home/yuhan/anaconda3/envs/monoflex/lib/python3.7/site-packages/torch/nn/modules/module.py", line 532, in call result = self.forward(input, kwargs) File "/home/yuhan/anaconda3/envs/monoflex/lib/python3.7/site-packages/torch/nn/modules/container.py", line 100, in forward input = module(input) File "/home/yuhan/anaconda3/envs/monoflex/lib/python3.7/site-packages/torch/nn/modules/module.py", line 532, in call result = self.forward(*input, **kwargs) File "/home/yuhan/anaconda3/envs/monoflex/lib/python3.7/site-packages/inplace_abn/abn.py", line 247, in forward self.activation_param, File "/home/yuhan/anaconda3/envs/monoflex/lib/python3.7/site-packages/inplace_abn/functions.py", line 249, in inplace_abn None, File "/home/yuhan/anaconda3/envs/monoflex/lib/python3.7/site-packages/inplaceabn/functions.py", line 107, in forward count = count.to(dtype=var.dtype) RuntimeError: CUDA error: invalid device function`
Do you have any idea to fix this ? Thank you !