When I use one gpu to run, it is OK. When I use muti-gpu, something is wrong like this.
File "/data2/gjt/pyenv/versions/anaconda3-5.2.0/lib/python3.6/site-packages/torch/nn/modules/module.py", line 491, in call
result = self.forward(*input, kwargs)
File "/data2/gjt/pyenv/versions/anaconda3-5.2.0/lib/python3.6/site-packages/torch/nn/parallel/data_parallel.py", line 114, in forward
outputs = self.parallel_apply(replicas, inputs, kwargs)
File "/data2/gjt/pyenv/versions/anaconda3-5.2.0/lib/python3.6/site-packages/torch/nn/parallel/data_parallel.py", line 124, in parallel_apply
return parallel_apply(replicas, inputs, kwargs, self.device_ids[:len(replicas)])
File "/data2/gjt/pyenv/versions/anaconda3-5.2.0/lib/python3.6/site-packages/torch/nn/parallel/parallel_apply.py", line 65, in parallel_apply
raise output
File "/data2/gjt/pyenv/versions/anaconda3-5.2.0/lib/python3.6/site-packages/torch/nn/parallel/parallel_apply.py", line 41, in _worker
output = module(*input, *kwargs)
File "/data2/gjt/pyenv/versions/anaconda3-5.2.0/lib/python3.6/site-packages/torch/nn/modules/module.py", line 491, in call
result = self.forward(input, kwargs)
File "/data3/hooks/retinanet/model.py", line 274, in forward
x3 = self.layer3(x2)
File "/data2/gjt/pyenv/versions/anaconda3-5.2.0/lib/python3.6/site-packages/torch/nn/modules/module.py", line 491, in call
result = self.forward(*input, kwargs)
File "/data2/gjt/pyenv/versions/anaconda3-5.2.0/lib/python3.6/site-packages/torch/nn/modules/container.py", line 91, in forward
input = module(input)
File "/data2/gjt/pyenv/versions/anaconda3-5.2.0/lib/python3.6/site-packages/torch/nn/modules/module.py", line 491, in call
result = self.forward(*input, *kwargs)
File "/data3/hooks/retinanet/non_local.py", line 94, in forward
output = self.operation_function(x)
File "/data3/hooks/retinanet/non_local.py", line 101, in _embedded_gaussian
g_x = self.g(x).view(batch_size, self.inter_channels, -1)
File "/data2/gjt/pyenv/versions/anaconda3-5.2.0/lib/python3.6/site-packages/torch/nn/modules/module.py", line 491, in call
result = self.forward(input, kwargs)
File "/data2/gjt/pyenv/versions/anaconda3-5.2.0/lib/python3.6/site-packages/torch/nn/modules/conv.py", line 301, in forward
self.padding, self.dilation, self.groups)
RuntimeError: Expected tensor for argument #1 'input' to have the same device as tensor for argument #2 'weight'; but device 1 does not equal 0 (while checking arguments for cudnn_convolution)
When I use one gpu to run, it is OK. When I use muti-gpu, something is wrong like this.
File "/data2/gjt/pyenv/versions/anaconda3-5.2.0/lib/python3.6/site-packages/torch/nn/modules/module.py", line 491, in call result = self.forward(*input, kwargs) File "/data2/gjt/pyenv/versions/anaconda3-5.2.0/lib/python3.6/site-packages/torch/nn/parallel/data_parallel.py", line 114, in forward outputs = self.parallel_apply(replicas, inputs, kwargs) File "/data2/gjt/pyenv/versions/anaconda3-5.2.0/lib/python3.6/site-packages/torch/nn/parallel/data_parallel.py", line 124, in parallel_apply return parallel_apply(replicas, inputs, kwargs, self.device_ids[:len(replicas)]) File "/data2/gjt/pyenv/versions/anaconda3-5.2.0/lib/python3.6/site-packages/torch/nn/parallel/parallel_apply.py", line 65, in parallel_apply raise output File "/data2/gjt/pyenv/versions/anaconda3-5.2.0/lib/python3.6/site-packages/torch/nn/parallel/parallel_apply.py", line 41, in _worker output = module(*input, *kwargs) File "/data2/gjt/pyenv/versions/anaconda3-5.2.0/lib/python3.6/site-packages/torch/nn/modules/module.py", line 491, in call result = self.forward(input, kwargs) File "/data3/hooks/retinanet/model.py", line 274, in forward x3 = self.layer3(x2) File "/data2/gjt/pyenv/versions/anaconda3-5.2.0/lib/python3.6/site-packages/torch/nn/modules/module.py", line 491, in call result = self.forward(*input, kwargs) File "/data2/gjt/pyenv/versions/anaconda3-5.2.0/lib/python3.6/site-packages/torch/nn/modules/container.py", line 91, in forward input = module(input) File "/data2/gjt/pyenv/versions/anaconda3-5.2.0/lib/python3.6/site-packages/torch/nn/modules/module.py", line 491, in call result = self.forward(*input, *kwargs) File "/data3/hooks/retinanet/non_local.py", line 94, in forward output = self.operation_function(x) File "/data3/hooks/retinanet/non_local.py", line 101, in _embedded_gaussian g_x = self.g(x).view(batch_size, self.inter_channels, -1) File "/data2/gjt/pyenv/versions/anaconda3-5.2.0/lib/python3.6/site-packages/torch/nn/modules/module.py", line 491, in call result = self.forward(input, kwargs) File "/data2/gjt/pyenv/versions/anaconda3-5.2.0/lib/python3.6/site-packages/torch/nn/modules/conv.py", line 301, in forward self.padding, self.dilation, self.groups) RuntimeError: Expected tensor for argument #1 'input' to have the same device as tensor for argument #2 'weight'; but device 1 does not equal 0 (while checking arguments for cudnn_convolution)