Make PyTorch models up to 40% faster! Thunder is a source to source compiler for PyTorch. It enables using different hardware executors at once; across one or thousands of GPUs.
Trying to run the HF model "microsoft/Phi-3-mini-4k-instruct" hits an issue translating an in-place op.
Traceback (most recent call last):
File "/home/tfogal/scratch/tfx-tests/phi3/phi3.py", line 70, in <module>
outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=input_ids)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1747, in _call_impl
return forward_call(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/_dynamo/eval_frame.py", line 465, in _fn
return fn(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1747, in _call_impl
return forward_call(*args, **kwargs)
File "/home/tfogal/.cache/huggingface/modules/transformers_modules/microsoft/Phi-3-mini-4k-instruct/0a67737cc96d2554230f90338b163bc6380a2a85/modeling_phi3.py", line 1243, in forward
outputs = self.model(
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1747, in _call_impl
return forward_call(*args, **kwargs)
File "/home/tfogal/.cache/huggingface/modules/transformers_modules/microsoft/Phi-3-mini-4k-instruct/0a67737cc96d2554230f90338b163bc6380a2a85/modeling_phi3.py", line 1091, in forward
attention_mask = _prepare_4d_causal_attention_mask(
File "/home/tfogal/env/lib/python3.10/site-packages/transformers/modeling_attn_mask_utils.py", line 295, in _prepare_4d_causal_attention_mask
def _prepare_4d_causal_attention_mask(
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1747, in _call_impl
return forward_call(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/_dynamo/eval_frame.py", line 632, in _fn
return fn(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/fx/graph_module.py", line 784, in call_wrapped
return self._wrapped_call(self, *args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/fx/graph_module.py", line 361, in __call__
raise e
File "/usr/local/lib/python3.10/dist-packages/torch/fx/graph_module.py", line 348, in __call__
return super(self.cls, obj).__call__(*args, **kwargs) # type: ignore[misc]
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1747, in _call_impl
return forward_call(*args, **kwargs)
File "<eval_with_key>.6", line 5, in forward
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1747, in _call_impl
return forward_call(*args, **kwargs)
File "/home/tfogal/scratch/thunder/thunder/core/module.py", line 80, in forward
res = self._forward_fn(*args, **kwargs)
File "/home/tfogal/scratch/thunder/thunder/__init__.py", line 724, in fn_
cache_entry, inps, pro_to_epi = get_computation_and_inputs(*args, **kwargs)
File "/home/tfogal/scratch/thunder/thunder/core/langctxs.py", line 136, in _fn
result = fn(*args, **kwargs)
File "/home/tfogal/scratch/thunder/thunder/__init__.py", line 219, in cache_info_wrapper
res = fn(*args, **kwargs)
File "/home/tfogal/scratch/thunder/thunder/__init__.py", line 527, in get_computation_and_inputs
orig_to_view_swap_map = check_inplace_to_views(computation_trc)
File "/home/tfogal/scratch/thunder/thunder/core/functionalization.py", line 62, in check_inplace_to_views
check(
File "/home/tfogal/scratch/thunder/thunder/core/baseutils.py", line 107, in check
raise exception_type(s())
NotImplementedError: in-place op of `torch.Tensor.masked_fill_` to `torch.Tensor.to` output `<TensorProxy(name="mask_1", dtype=thunder.dtypes.bfloat16, shape=(1, 1))>` is not supported. It's unclear if the output of ('torch.flatten', 'torch.reshape', 'Tensor.reshape_as', 'torch.Tensor.to', 'torch.Tensor.contiguous') is a copy, a view, or the input itself, as per https://pytorch.org/docs/stable/tensor_view.html
What is the trace when this happens?
But we identified this as unclear behaviour, but I'm wondering if the .to is from the user code or from a decomposition.
🚀 Model / language coverage
Trying to run the HF model "microsoft/Phi-3-mini-4k-instruct" hits an issue translating an in-place op.
The error comes from https://github.com/Lightning-AI/lightning-thunder/blob/fceb64efc93a80a27d38b8e84f0e2b5f132f3d2f/thunder/core/functionalization.py#L62-L71
Pitch
The issue blocks a model for integration.
Minimal Repro