import torch
import centml.compiler
def bar(a, b):
x = a / (torch.abs(a) + 1)
if b.sum() < 0:
b = b * -1
return x * b
compiled_bar = torch.compile(bar, backend='centml')
compiled_bar(torch.randn(2), torch.randn(2))
compiled_bar(torch.randn(4), torch.randn(4))
The issue stems from calling numel() on tensor(s) with symbolic sizes/strides, which are present in dynamically shaped graphs.
Full stack trace:
Exception in thread Thread-6 (remote_compilation):
Traceback (most recent call last):
File "/home/ubuntu/centml-python-client/centml/compiler/backend.py", line 75, in _serialize_model_and_inputs
Exception in thread Thread-7 (prediction):
torch.save(self.inputs, self.serialized_input_path, pickle_protocol=config_instance.PICKLE_PROTOCOL)
File "/home/ubuntu/centml-python-client/venv/lib/python3.10/site-packages/torch/serialization.py", line 628, in save
Traceback (most recent call last):
File "/home/ubuntu/centml-python-client/centml/compiler/backend.py", line 75, in _serialize_model_and_inputs
_save(obj, opened_zipfile, pickle_module, pickle_protocol, _disable_byteorder_record)
File "/home/ubuntu/centml-python-client/venv/lib/python3.10/site-packages/torch/serialization.py", line 840, in _save
torch.save(self.inputs, self.serialized_input_path, pickle_protocol=config_instance.PICKLE_PROTOCOL)
File "/home/ubuntu/centml-python-client/venv/lib/python3.10/site-packages/torch/serialization.py", line 628, in save
pickler.dump(obj)
File "/home/ubuntu/centml-python-client/venv/lib/python3.10/site-packages/torch/_tensor.py", line 219, in __reduce_ex__
_save(obj, opened_zipfile, pickle_module, pickle_protocol, _disable_byteorder_record)
File "/home/ubuntu/centml-python-client/venv/lib/python3.10/site-packages/torch/serialization.py", line 840, in _save
func, args = self._reduce_ex_internal(proto)
File "/home/ubuntu/centml-python-client/venv/lib/python3.10/site-packages/torch/_tensor.py", line 381, in _reduce_ex_internal
pickler.dump(obj)
Runner
File "/home/ubuntu/centml-python-client/venv/lib/python3.10/site-packages/torch/_tensor.py", line 219, in __reduce_ex__
self.data_ptr() == 0
func, args = self._reduce_ex_internal(proto)
File "/home/ubuntu/centml-python-client/venv/lib/python3.10/site-packages/torch/_tensor.py", line 381, in _reduce_ex_internal
RuntimeError: Cannot call numel() on tensor with symbolic sizes/strides
Exception raised from throw_cannot_call_with_symbolic at ../c10/core/TensorImpl.cpp:298 (most recent call first):
frame #0: c10::Error::Error(c10::SourceLocation, std::string) + 0x57 (0x7fd100ecf897 in /home/ubuntu/centml-python-client/venv/lib/python3.10/site-packages/torch/lib/libc10.so)
frame #1: c10::TensorImpl::throw_cannot_call_with_symbolic(char const*) const + 0x9c (0x7fd100e7e1ec in /home/ubuntu/centml-python-client/venv/lib/python3.10/site-packages/torch/lib/libc10.so)
frame #2: <unknown function> + 0x60f7f (0x7fd100eaaf7f in /home/ubuntu/centml-python-client/venv/lib/python3.10/site-packages/torch/lib/libc10.so)
frame #3: <unknown function> + 0x51684a (0x7fd0ff51684a in /home/ubuntu/centml-python-client/venv/lib/python3.10/site-packages/torch/lib/libtorch_python.so)
<omitting python frames>
frame #44: <unknown function> + 0x94ac3 (0x7fd101c94ac3 in /lib/x86_64-linux-gnu/libc.so.6)
frame #45: <unknown function> + 0x126850 (0x7fd101d26850 in /lib/x86_64-linux-gnu/libc.so.6)
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/usr/lib/python3.10/threading.py", line 1016, in _bootstrap_inner
self.data_ptr() == 0
RuntimeError: Cannot call numel() on tensor with symbolic sizes/strides
Exception raised from throw_cannot_call_with_symbolic at ../c10/core/TensorImpl.cpp:298 (most recent call first):
frame #0: c10::Error::Error(c10::SourceLocation, std::string) + 0x57 (0x7fd100ecf897 in /home/ubuntu/centml-python-client/venv/lib/python3.10/site-packages/torch/lib/libc10.so)
frame #1: c10::TensorImpl::throw_cannot_call_with_symbolic(char const*) const + 0x9c (0x7fd100e7e1ec in /home/ubuntu/centml-python-client/venv/lib/python3.10/site-packages/torch/lib/libc10.so)
frame #2: <unknown function> + 0x60f7f (0x7fd100eaaf7f in /home/ubuntu/centml-python-client/venv/lib/python3.10/site-packages/torch/lib/libc10.so)
frame #3: <unknown function> + 0x51684a (0x7fd0ff51684a in /home/ubuntu/centml-python-client/venv/lib/python3.10/site-packages/torch/lib/libtorch_python.so)
<omitting python frames>
frame #44: <unknown function> + 0x94ac3 (0x7fd101c94ac3 in /lib/x86_64-linux-gnu/libc.so.6)
frame #45: <unknown function> + 0x126850 (0x7fd101d26850 in /lib/x86_64-linux-gnu/libc.so.6)
self.run()
File "/usr/lib/python3.10/threading.py", line 953, in run
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/usr/lib/python3.10/threading.py", line 1016, in _bootstrap_inner
self._target(*self._args, **self._kwargs)
File "/home/ubuntu/centml-python-client/centml/compiler/backend.py", line 159, in remote_compilation
self.run()
File "/usr/lib/python3.10/threading.py", line 953, in run
self._serialize_model_and_inputs()
File "/home/ubuntu/centml-python-client/centml/compiler/backend.py", line 77, in _serialize_model_and_inputs
self._target(*self._args, **self._kwargs)
File "/home/ubuntu/centml-python-client/centml/compiler/backend.py", line 186, in prediction
raise Exception(f"Failed to save module or inputs with torch.save: {e}") from e
Exception: Failed to save module or inputs with torch.save: Cannot call numel() on tensor with symbolic sizes/strides
Exception raised from throw_cannot_call_with_symbolic at ../c10/core/TensorImpl.cpp:298 (most recent call first):
frame #0: c10::Error::Error(c10::SourceLocation, std::string) + 0x57 (0x7fd100ecf897 in /home/ubuntu/centml-python-client/venv/lib/python3.10/site-packages/torch/lib/libc10.so)
frame #1: c10::TensorImpl::throw_cannot_call_with_symbolic(char const*) const + 0x9c (0x7fd100e7e1ec in /home/ubuntu/centml-python-client/venv/lib/python3.10/site-packages/torch/lib/libc10.so)
frame #2: <unknown function> + 0x60f7f (0x7fd100eaaf7f in /home/ubuntu/centml-python-client/venv/lib/python3.10/site-packages/torch/lib/libc10.so)
frame #3: <unknown function> + 0x51684a (0x7fd0ff51684a in /home/ubuntu/centml-python-client/venv/lib/python3.10/site-packages/torch/lib/libtorch_python.so)
<omitting python frames>
frame #44: <unknown function> + 0x94ac3 (0x7fd101c94ac3 in /lib/x86_64-linux-gnu/libc.so.6)
frame #45: <unknown function> + 0x126850 (0x7fd101d26850 in /lib/x86_64-linux-gnu/libc.so.6)
self._serialize_model_and_inputs()
File "/home/ubuntu/centml-python-client/centml/compiler/backend.py", line 77, in _serialize_model_and_inputs
raise Exception(f"Failed to save module or inputs with torch.save: {e}") from e
Exception: Failed to save module or inputs with torch.save: Cannot call numel() on tensor with symbolic sizes/strides
Exception raised from throw_cannot_call_with_symbolic at ../c10/core/TensorImpl.cpp:298 (most recent call first):
frame #0: c10::Error::Error(c10::SourceLocation, std::string) + 0x57 (0x7fd100ecf897 in /home/ubuntu/centml-python-client/venv/lib/python3.10/site-packages/torch/lib/libc10.so)
frame #1: c10::TensorImpl::throw_cannot_call_with_symbolic(char const*) const + 0x9c (0x7fd100e7e1ec in /home/ubuntu/centml-python-client/venv/lib/python3.10/site-packages/torch/lib/libc10.so)
frame #2: <unknown function> + 0x60f7f (0x7fd100eaaf7f in /home/ubuntu/centml-python-client/venv/lib/python3.10/site-packages/torch/lib/libc10.so)
frame #3: <unknown function> + 0x51684a (0x7fd0ff51684a in /home/ubuntu/centml-python-client/venv/lib/python3.10/site-packages/torch/lib/libtorch_python.so)
<omitting python frames>
frame #44: <unknown function> + 0x94ac3 (0x7fd101c94ac3 in /lib/x86_64-linux-gnu/libc.so.6)
frame #45: <unknown function> + 0x126850 (0x7fd101d26850 in /lib/x86_64-linux-gnu/libc.so.6)
The issue actually stems from saving the inputs, not the graph. The line of interest is torch.save(self.inputs, self.serialized_input_path, pickle_protocol=config_instance.PICKLE_PROTOCOL)
The following code errors out on the client side:
The issue stems from calling
numel()
on tensor(s) with symbolic sizes/strides, which are present in dynamically shaped graphs.Full stack trace:
┆Issue is synchronized with this Notion page by Unito