'poptorch_cpp_error': Unsupported ops found in compiled model: [aten::nan_to_num]. Not all operations are supported yet by Graphcore's PyTorch compiler. If you believe any of these should be, please report this message to support@graphcore.ai. #19
`Traceback (most recent call last):
File "train.py", line 166, in
trainer.fit(model, train_loader)
File "/opt/pytorch/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 532, in fit
call._call_and_handle_interrupt(
File "/opt/pytorch/lib/python3.8/site-packages/pytorch_lightning/trainer/call.py", line 43, in _call_and_handle_interrupt
return trainer_fn(*args, kwargs)
File "/opt/pytorch/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 571, in _fit_impl
self._run(model, ckpt_path=ckpt_path)
File "/opt/pytorch/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 980, in _run
results = self._run_stage()
File "/opt/pytorch/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 1023, in _run_stage
self.fit_loop.run()
File "/opt/pytorch/lib/python3.8/site-packages/pytorch_lightning/loops/fit_loop.py", line 202, in run
self.advance()
File "/opt/pytorch/lib/python3.8/site-packages/pytorch_lightning/loops/fit_loop.py", line 355, in advance
self.epoch_loop.run(self._data_fetcher)
File "/opt/pytorch/lib/python3.8/site-packages/pytorch_lightning/loops/training_epoch_loop.py", line 133, in run
self.advance(data_fetcher)
File "/opt/pytorch/lib/python3.8/site-packages/pytorch_lightning/loops/training_epoch_loop.py", line 219, in advance
batch_output = self.automatic_optimization.run(trainer.optimizers[0], kwargs)
File "/opt/pytorch/lib/python3.8/site-packages/pytorch_lightning/loops/optimization/automatic.py", line 181, in run
closure()
File "/opt/pytorch/lib/python3.8/site-packages/pytorch_lightning/loops/optimization/automatic.py", line 142, in call
self._result = self.closure(*args, *kwargs)
File "/opt/pytorch/lib/python3.8/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(args, kwargs)
File "/opt/pytorch/lib/python3.8/site-packages/pytorch_lightning/loops/optimization/automatic.py", line 128, in closure
step_output = self._step_fn()
File "/opt/pytorch/lib/python3.8/site-packages/pytorch_lightning/loops/optimization/automatic.py", line 315, in _training_step
training_step_output = call._call_strategy_hook(trainer, "training_step", kwargs.values())
File "/opt/pytorch/lib/python3.8/site-packages/pytorch_lightning/trainer/call.py", line 294, in _call_strategy_hook
output = fn(args, kwargs)
File "/opt/pytorch/lib/python3.8/site-packages/pytorch_lightning/strategies/ipu.py", line 269, in training_step
return self._step(RunningStage.TRAINING, *args, *kwargs)
File "/opt/pytorch/lib/python3.8/site-packages/pytorch_lightning/strategies/ipu.py", line 265, in _step
return poptorch_model(args, kwargs)
File "/opt/pytorch/lib/python3.8/site-packages/poptorch/_poplar_executor.py", line 1238, in call
self._compile(in_tensors)
File "/opt/pytorch/lib/python3.8/site-packages/poptorch/_impl.py", line 358, in wrapper
return func(self, *args, *kwargs)
File "/opt/pytorch/lib/python3.8/site-packages/poptorch/_poplar_executor.py", line 975, in _compile
self._executable = self._compileWithDispatch(in_tensors_trace_view)
File "/opt/pytorch/lib/python3.8/site-packages/poptorch/_impl.py", line 164, in wrapper
return func(args, **kwargs)
File "/opt/pytorch/lib/python3.8/site-packages/poptorch/_poplar_executor.py", line 936, in _compileWithDispatch
executable = poptorch_core.compileWithManualTracing(
poptorch.poptorch_core.Error: In poptorch/source/ErrorOnUnsupportedAten.cpp:30: 'poptorch_cpp_error': Unsupported ops found in compiled model: [aten::nan_to_num]. Not all operations are supported yet by Graphcore's PyTorch compiler. If you believe any of these should be, please report this message to support@graphcore.ai.
Error raised in:
[0] compileWithManualTracing
[08:55:41.247] [poptorch::python] [critical] poptorch.poptorch_core.Error: In poptorch/source/ErrorOnUnsupportedAten.cpp:30: 'poptorch_cpp_error': Unsupported ops found in compiled model: [aten::nan_to_num]. Not all operations are supported yet by Graphcore's PyTorch compiler. If you believe any of these should be, please report this message to support@graphcore.ai.
Error raised in:
[0] compileWithManualTracing
Traceback (most recent call last):
File "train.py", line 166, in
trainer.fit(model, train_loader)
File "/opt/pytorch/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 532, in fit
call._call_and_handle_interrupt(
File "/opt/pytorch/lib/python3.8/site-packages/pytorch_lightning/trainer/call.py", line 43, in _call_and_handle_interrupt
return trainer_fn(*args, kwargs)
File "/opt/pytorch/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 571, in _fit_impl
self._run(model, ckpt_path=ckpt_path)
File "/opt/pytorch/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 980, in _run
results = self._run_stage()
File "/opt/pytorch/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 1023, in _run_stage
self.fit_loop.run()
File "/opt/pytorch/lib/python3.8/site-packages/pytorch_lightning/loops/fit_loop.py", line 202, in run
self.advance()
File "/opt/pytorch/lib/python3.8/site-packages/pytorch_lightning/loops/fit_loop.py", line 355, in advance
self.epoch_loop.run(self._data_fetcher)
File "/opt/pytorch/lib/python3.8/site-packages/pytorch_lightning/loops/training_epoch_loop.py", line 133, in run
self.advance(data_fetcher)
File "/opt/pytorch/lib/python3.8/site-packages/pytorch_lightning/loops/training_epoch_loop.py", line 219, in advance
batch_output = self.automatic_optimization.run(trainer.optimizers[0], kwargs)
File "/opt/pytorch/lib/python3.8/site-packages/pytorch_lightning/loops/optimization/automatic.py", line 181, in run
closure()
File "/opt/pytorch/lib/python3.8/site-packages/pytorch_lightning/loops/optimization/automatic.py", line 142, in call
self._result = self.closure(*args, *kwargs)
File "/opt/pytorch/lib/python3.8/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(args, kwargs)
File "/opt/pytorch/lib/python3.8/site-packages/pytorch_lightning/loops/optimization/automatic.py", line 128, in closure
step_output = self._step_fn()
File "/opt/pytorch/lib/python3.8/site-packages/pytorch_lightning/loops/optimization/automatic.py", line 315, in _training_step
training_step_output = call._call_strategy_hook(trainer, "training_step", kwargs.values())
File "/opt/pytorch/lib/python3.8/site-packages/pytorch_lightning/trainer/call.py", line 294, in _call_strategy_hook
output = fn(args, kwargs)
File "/opt/pytorch/lib/python3.8/site-packages/pytorch_lightning/strategies/ipu.py", line 269, in training_step
return self._step(RunningStage.TRAINING, *args, *kwargs)
File "/opt/pytorch/lib/python3.8/site-packages/pytorch_lightning/strategies/ipu.py", line 265, in _step
return poptorch_model(args, kwargs)
File "/opt/pytorch/lib/python3.8/site-packages/poptorch/_poplar_executor.py", line 1238, in call
self._compile(in_tensors)
File "/opt/pytorch/lib/python3.8/site-packages/poptorch/_impl.py", line 358, in wrapper
return func(self, *args, *kwargs)
File "/opt/pytorch/lib/python3.8/site-packages/poptorch/_poplar_executor.py", line 975, in _compile
self._executable = self._compileWithDispatch(in_tensors_trace_view)
File "/opt/pytorch/lib/python3.8/site-packages/poptorch/_impl.py", line 164, in wrapper
return func(args, **kwargs)
File "/opt/pytorch/lib/python3.8/site-packages/poptorch/_poplar_executor.py", line 936, in _compileWithDispatch
executable = poptorch_core.compileWithManualTracing(
poptorch.poptorch_core.Error: In poptorch/source/ErrorOnUnsupportedAten.cpp:30: 'poptorch_cpp_error': Unsupported ops found in compiled model: [aten::nan_to_num]. Not all operations are supported yet by Graphcore's PyTorch compiler. If you believe any of these should be, please report this message to support@graphcore.ai.
Error raised in:
[0] compileWithManualTracing`
I'm trying out IPUs to train a model, however, the model won't even go beyond a step due to this error:
Using paperspace setting:
pytorch 2.0.1 poptorch 3.3 pytorch-lightning 2.0.9
`Traceback (most recent call last): File "train.py", line 166, in
trainer.fit(model, train_loader)
File "/opt/pytorch/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 532, in fit
call._call_and_handle_interrupt(
File "/opt/pytorch/lib/python3.8/site-packages/pytorch_lightning/trainer/call.py", line 43, in _call_and_handle_interrupt
return trainer_fn(*args, kwargs)
File "/opt/pytorch/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 571, in _fit_impl
self._run(model, ckpt_path=ckpt_path)
File "/opt/pytorch/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 980, in _run
results = self._run_stage()
File "/opt/pytorch/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 1023, in _run_stage
self.fit_loop.run()
File "/opt/pytorch/lib/python3.8/site-packages/pytorch_lightning/loops/fit_loop.py", line 202, in run
self.advance()
File "/opt/pytorch/lib/python3.8/site-packages/pytorch_lightning/loops/fit_loop.py", line 355, in advance
self.epoch_loop.run(self._data_fetcher)
File "/opt/pytorch/lib/python3.8/site-packages/pytorch_lightning/loops/training_epoch_loop.py", line 133, in run
self.advance(data_fetcher)
File "/opt/pytorch/lib/python3.8/site-packages/pytorch_lightning/loops/training_epoch_loop.py", line 219, in advance
batch_output = self.automatic_optimization.run(trainer.optimizers[0], kwargs)
File "/opt/pytorch/lib/python3.8/site-packages/pytorch_lightning/loops/optimization/automatic.py", line 181, in run
closure()
File "/opt/pytorch/lib/python3.8/site-packages/pytorch_lightning/loops/optimization/automatic.py", line 142, in call
self._result = self.closure(*args, *kwargs)
File "/opt/pytorch/lib/python3.8/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(args, kwargs)
File "/opt/pytorch/lib/python3.8/site-packages/pytorch_lightning/loops/optimization/automatic.py", line 128, in closure
step_output = self._step_fn()
File "/opt/pytorch/lib/python3.8/site-packages/pytorch_lightning/loops/optimization/automatic.py", line 315, in _training_step
training_step_output = call._call_strategy_hook(trainer, "training_step", kwargs.values())
File "/opt/pytorch/lib/python3.8/site-packages/pytorch_lightning/trainer/call.py", line 294, in _call_strategy_hook
output = fn(args, kwargs)
File "/opt/pytorch/lib/python3.8/site-packages/pytorch_lightning/strategies/ipu.py", line 269, in training_step
return self._step(RunningStage.TRAINING, *args, *kwargs)
File "/opt/pytorch/lib/python3.8/site-packages/pytorch_lightning/strategies/ipu.py", line 265, in _step
return poptorch_model(args, kwargs)
File "/opt/pytorch/lib/python3.8/site-packages/poptorch/_poplar_executor.py", line 1238, in call
self._compile(in_tensors)
File "/opt/pytorch/lib/python3.8/site-packages/poptorch/_impl.py", line 358, in wrapper
return func(self, *args, *kwargs)
File "/opt/pytorch/lib/python3.8/site-packages/poptorch/_poplar_executor.py", line 975, in _compile
self._executable = self._compileWithDispatch(in_tensors_trace_view)
File "/opt/pytorch/lib/python3.8/site-packages/poptorch/_impl.py", line 164, in wrapper
return func(args, **kwargs)
File "/opt/pytorch/lib/python3.8/site-packages/poptorch/_poplar_executor.py", line 936, in _compileWithDispatch
executable = poptorch_core.compileWithManualTracing(
poptorch.poptorch_core.Error: In poptorch/source/ErrorOnUnsupportedAten.cpp:30: 'poptorch_cpp_error': Unsupported ops found in compiled model: [aten::nan_to_num]. Not all operations are supported yet by Graphcore's PyTorch compiler. If you believe any of these should be, please report this message to support@graphcore.ai.
Error raised in:
[0] compileWithManualTracing
[08:55:41.247] [poptorch::python] [critical] poptorch.poptorch_core.Error: In poptorch/source/ErrorOnUnsupportedAten.cpp:30: 'poptorch_cpp_error': Unsupported ops found in compiled model: [aten::nan_to_num]. Not all operations are supported yet by Graphcore's PyTorch compiler. If you believe any of these should be, please report this message to support@graphcore.ai. Error raised in: [0] compileWithManualTracing
Traceback (most recent call last): File "train.py", line 166, in
trainer.fit(model, train_loader)
File "/opt/pytorch/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 532, in fit
call._call_and_handle_interrupt(
File "/opt/pytorch/lib/python3.8/site-packages/pytorch_lightning/trainer/call.py", line 43, in _call_and_handle_interrupt
return trainer_fn(*args, kwargs)
File "/opt/pytorch/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 571, in _fit_impl
self._run(model, ckpt_path=ckpt_path)
File "/opt/pytorch/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 980, in _run
results = self._run_stage()
File "/opt/pytorch/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 1023, in _run_stage
self.fit_loop.run()
File "/opt/pytorch/lib/python3.8/site-packages/pytorch_lightning/loops/fit_loop.py", line 202, in run
self.advance()
File "/opt/pytorch/lib/python3.8/site-packages/pytorch_lightning/loops/fit_loop.py", line 355, in advance
self.epoch_loop.run(self._data_fetcher)
File "/opt/pytorch/lib/python3.8/site-packages/pytorch_lightning/loops/training_epoch_loop.py", line 133, in run
self.advance(data_fetcher)
File "/opt/pytorch/lib/python3.8/site-packages/pytorch_lightning/loops/training_epoch_loop.py", line 219, in advance
batch_output = self.automatic_optimization.run(trainer.optimizers[0], kwargs)
File "/opt/pytorch/lib/python3.8/site-packages/pytorch_lightning/loops/optimization/automatic.py", line 181, in run
closure()
File "/opt/pytorch/lib/python3.8/site-packages/pytorch_lightning/loops/optimization/automatic.py", line 142, in call
self._result = self.closure(*args, *kwargs)
File "/opt/pytorch/lib/python3.8/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(args, kwargs)
File "/opt/pytorch/lib/python3.8/site-packages/pytorch_lightning/loops/optimization/automatic.py", line 128, in closure
step_output = self._step_fn()
File "/opt/pytorch/lib/python3.8/site-packages/pytorch_lightning/loops/optimization/automatic.py", line 315, in _training_step
training_step_output = call._call_strategy_hook(trainer, "training_step", kwargs.values())
File "/opt/pytorch/lib/python3.8/site-packages/pytorch_lightning/trainer/call.py", line 294, in _call_strategy_hook
output = fn(args, kwargs)
File "/opt/pytorch/lib/python3.8/site-packages/pytorch_lightning/strategies/ipu.py", line 269, in training_step
return self._step(RunningStage.TRAINING, *args, *kwargs)
File "/opt/pytorch/lib/python3.8/site-packages/pytorch_lightning/strategies/ipu.py", line 265, in _step
return poptorch_model(args, kwargs)
File "/opt/pytorch/lib/python3.8/site-packages/poptorch/_poplar_executor.py", line 1238, in call
self._compile(in_tensors)
File "/opt/pytorch/lib/python3.8/site-packages/poptorch/_impl.py", line 358, in wrapper
return func(self, *args, *kwargs)
File "/opt/pytorch/lib/python3.8/site-packages/poptorch/_poplar_executor.py", line 975, in _compile
self._executable = self._compileWithDispatch(in_tensors_trace_view)
File "/opt/pytorch/lib/python3.8/site-packages/poptorch/_impl.py", line 164, in wrapper
return func(args, **kwargs)
File "/opt/pytorch/lib/python3.8/site-packages/poptorch/_poplar_executor.py", line 936, in _compileWithDispatch
executable = poptorch_core.compileWithManualTracing(
poptorch.poptorch_core.Error: In poptorch/source/ErrorOnUnsupportedAten.cpp:30: 'poptorch_cpp_error': Unsupported ops found in compiled model: [aten::nan_to_num]. Not all operations are supported yet by Graphcore's PyTorch compiler. If you believe any of these should be, please report this message to support@graphcore.ai.
Error raised in:
[0] compileWithManualTracing`