Open alexbaden opened 9 months ago
Also fail with v2.1.
I could reduce the original benchmark to a simple model with 4 layers:
class TestModel(torch.nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1, groups=1, bias=False, dilation=1)
self.conv2 = nn.Conv2d(64, 128, kernel_size=1, stride=2, bias=False)
self.norm = nn.GroupNorm(32, 128)
self.relu = nn.ReLU(inplace=True)
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
x = self.norm(x)
x = self.relu(x)
return x
example_inputs = torch.randn(4, 64, 4, 4)
...
For this model, training results differ for XPU in eager and inductor modes (but XPU eager matches CPU eager). Going through the code generated by TorchInductor, I found that the difference appears in the backward convolution which is in torch.ops.aten
, so Triton is not involved. With this knowledge, I could write a simple test showing this operation works differently on CPU and XPU devices (all tensor sizes and convolution params match the reproducer above):
import torch
import intel_extension_for_pytorch
from torch._dynamo.testing import rand_strided, same
torch.manual_seed(1337)
arg1 = rand_strided((4, 128, 1, 1), (128, 1, 1, 1), device='cpu', dtype=torch.float32)
arg2 = rand_strided((4, 64, 2, 2), (256, 1, 128, 64), device='cpu', dtype=torch.float32)
arg3 = rand_strided((128, 64, 1, 1), (64, 1, 1, 1), device='cpu', dtype=torch.float32)
def run_conv_bwd(arg1, arg2, arg3, device):
arg1_dev = arg1.to(device)
arg2_dev = arg2.to(device)
arg3_dev = arg3.to(device)
res = torch.ops.aten.convolution_backward(arg1_dev, arg2_dev, arg3_dev, [0], [2, 2], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False])
res = tuple(v.to('cpu') if v is not None else v for v in res)
return res
cpu_res = run_conv_bwd(arg1, arg2, arg3, 'cpu')
xpu_res = run_conv_bwd(arg1, arg2, arg3, 'xpu')
print(f"CPU result:\n{cpu_res}")
print(f"XPU result:\n{xpu_res}")
assert(same(cpu_res, xpu_res))
The issue is still reproducible.
Env:
9a8ab778d34bd24c5caceb340837483decc4c311
fe93a00ffe438e9ba8c8392c0b051b1662c810de
d54ca9f80ead108c8797441681e219becaf963d8
1980f8af5bcd0bb2ce51965cf79d8d4c25dad8a0
10239873229e527f8b7e7b3340c40ee38bb1cfc4