I tried to do QAT with multiple GPUs with torch.nn.DataParallel, but I got an error
Traceback (most recent call last):
File "scripts/qat.py", line 347, in <module>
args.eval_origin, args.eval_ptq
File "scripts/qat.py", line 245, in cmd_quantize
preprocess=preprocess, supervision_policy=supervision_policy())
File "/GSOL_lossless_AI/yolov7/quantization/quantize.py", line 347, in finetune
model(imgs)
File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "/usr/local/lib/python3.6/dist-packages/torch/nn/parallel/data_parallel.py", line 168, in forward
outputs = self.parallel_apply(replicas, inputs, kwargs)
File "/usr/local/lib/python3.6/dist-packages/torch/nn/parallel/data_parallel.py", line 178, in parallel_apply
return parallel_apply(replicas, inputs, kwargs, self.device_ids[:len(replicas)])
File "/usr/local/lib/python3.6/dist-packages/torch/nn/parallel/parallel_apply.py", line 86, in parallel_apply
output.reraise()
File "/usr/local/lib/python3.6/dist-packages/torch/_utils.py", line 434, in reraise
raise exception
RuntimeError: Caught RuntimeError in replica 1 on device 1.
Original Traceback (most recent call last):
File "/usr/local/lib/python3.6/dist-packages/torch/nn/parallel/parallel_apply.py", line 61, in _worker
output = module(*input, **kwargs)
File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "/GSOL_lossless_AI/yolov7/models/yolo.py", line 599, in forward
return self.forward_once(x, profile) # single-scale inference, train
File "/GSOL_lossless_AI/yolov7/models/yolo.py", line 625, in forward_once
x = m(x) # run
File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py", line 1120, in _call_impl
result = forward_call(*input, **kwargs)
File "/GSOL_lossless_AI/yolov7/models/common.py", line 111, in fuseforward
return self.act(self.conv(x))
File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "/usr/local/lib/python3.6/dist-packages/pytorch_quantization/nn/modules/quant_conv.py", line 120, in forward
quant_input, quant_weight = self._quant(input)
File "/usr/local/lib/python3.6/dist-packages/pytorch_quantization/nn/modules/quant_conv.py", line 85, in _quant
quant_input = self._input_quantizer(input)
File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "/usr/local/lib/python3.6/dist-packages/pytorch_quantization/nn/modules/tensor_quantizer.py", line 346, in forward
outputs = self._quant_forward(inputs)
File "/usr/local/lib/python3.6/dist-packages/pytorch_quantization/nn/modules/tensor_quantizer.py", line 310, in _quant_forward
outputs = fake_tensor_quant(inputs, amax, self._num_bits, self._unsigned, self._narrow_range)
File "/usr/local/lib/python3.6/dist-packages/pytorch_quantization/tensor_quant.py", line 306, in forward
outputs, scale = _tensor_quant(inputs, amax, num_bits, unsigned, narrow_range)
File "/usr/local/lib/python3.6/dist-packages/pytorch_quantization/tensor_quant.py", line 354, in _tensor_quant
outputs = torch.clamp((inputs * scale).round_(), min_bound, max_bound)
RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:1 and cuda:0!
I tried to do QAT with multiple GPUs with
torch.nn.DataParallel
, but I got an error