[Inductor] [CPU] Crash failure in timm models (mkldnn._convolution_pointwise.binary)

🐛 Describe the bug

This failure found in the latest TorchInductor CPU Performance Dashboard refresh test with below error log (same crash applies to 3 models; sebotnet33ts_256 eca_halonext26ts eca_botnext26ts_256 bug

SW information

SW	Nightly commit	Master/Main commit
Pytorch	b23c765	9a1c6fd
Torchbench	/	2e5d723
torchaudio	c44b576	8ba323b
torchtext	ebcfed5	b3390fb
torchvision	d0f2888	5b4f79d
dynamo/benchmarks	db1da1f	5266953

Error logs

cpu  eval  eca_halonext26ts                   Downloading: "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-attn-weights/eca_halonext26ts_c_256-06906299.pth" to /root/.cache/torch/hub/checkpoints/eca_halonext26ts_c_256-06906299.pth
WARNING:root:Failed to collect metadata on function, produced code may be suboptimal.  Known situations this can occur are inference mode only compilation involving resize_ or prims (!schema.hasAnyAliasInfo() INTERNAL ASSERT FAILED); if your situation looks different please file a bug to PyTorch.
Traceback (most recent call last):
  File "/workspace/pytorch/torch/_subclasses/fake_tensor.py", line 856, in __torch_dispatch__
    r = func(*args, **kwargs)
  File "/workspace/pytorch/torch/_ops.py", line 285, in __call__
    return self._op(*args, **kwargs or {})
  File "/workspace/pytorch/torch/_ops.py", line 378, in _get_dispatch
    final_key = resolve_key(self, key)
  File "/workspace/pytorch/torch/_ops.py", line 107, in resolve_key
    raise NotImplementedError(f"could not find kernel for {op} at dispatch key {k}")
NotImplementedError: could not find kernel for mkldnn._convolution_pointwise.binary at dispatch key DispatchKey.Meta

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/workspace/pytorch/functorch/_src/aot_autograd.py", line 1163, in aot_wrapper_dedupe
    fw_metadata, _out, _num_aliasing_metadata_outs = run_functionalized_fw_and_collect_metadata(
  File "/workspace/pytorch/functorch/_src/aot_autograd.py", line 272, in inner
    outs = f(*f_args)
  File "/workspace/pytorch/functorch/_src/aot_autograd.py", line 2065, in functional_call
    out = Interpreter(mod).run(*args[params_len:], **kwargs)
  File "/workspace/pytorch/torch/fx/interpreter.py", line 130, in run
    self.env[node] = self.run_node(node)
  File "/workspace/pytorch/torch/fx/interpreter.py", line 171, in run_node
    return getattr(self, n.op)(n.target, args, kwargs)
  File "/workspace/pytorch/torch/fx/interpreter.py", line 288, in call_module
    return submod(*args, **kwargs)
  File "/workspace/pytorch/torch/nn/modules/module.py", line 1480, in _call_impl
    return forward_call(*args, **kwargs)
  File "/workspace/pytorch/torch/_inductor/overrides.py", line 241, in forward
    return self._conv_forward(input, other, self.weight, self.bias)
  File "/workspace/pytorch/torch/_inductor/overrides.py", line 224, in _conv_forward
    return torch.ops.mkldnn._convolution_pointwise(
  File "/workspace/pytorch/torch/_ops.py", line 500, in __call__
    return self._op(*args, **kwargs or {})
  File "/workspace/pytorch/torch/_inductor/overrides.py", line 36, in __torch_function__
    return func(*args, **kwargs)
  File "/workspace/pytorch/torch/_ops.py", line 500, in __call__
    return self._op(*args, **kwargs or {})
  File "/workspace/pytorch/torch/_subclasses/fake_tensor.py", line 861, in __torch_dispatch__
    return run_fallback_kernel(self, func, args, kwargs, not_implemented_error)
  File "/workspace/pytorch/torch/_subclasses/fake_tensor.py", line 1006, in run_fallback_kernel
    r = func(*args, **kwargs)
  File "/workspace/pytorch/torch/_ops.py", line 285, in __call__
    return self._op(*args, **kwargs or {})
RuntimeError: Unary Fusion behavior undefined.

While executing %mod_stages_0_0_conv3_1x1_conv : [#users=2] = call_module[target=mod_stages_0_0_conv3_1x1_conv](args = (%mul, %mod_stages_0_0_shortcut_conv), kwargs = {})
Original traceback:
Module stack: {'mod': <class 'timm.models.byobnet.ByobNet'>, 'mod_stages': <class 'torch.nn.modules.container.Sequential'>, 'mod_stages_0': <class 'torch.nn.modules.container.Sequential'>, 'mod_stages_0_0': <class 'timm.models.byobnet.BottleneckBlock'>, 'mod_stages_0_0_conv3_1x1': <class 'timm.models.layers.conv_bn_act.ConvNormAct'>, 'mod_stages_0_0_conv3_1x1_conv': <class 'torch.nn.modules.conv.Conv2d'>}
  File "/opt/conda/lib/python3.8/site-packages/timm/models/layers/conv_bn_act.py", line 35, in forward
    x = self.conv(x)
 |   File "/opt/conda/lib/python3.8/site-packages/timm/models/byobnet.py", line 1046, in forward
    x = self.conv3_1x1(x)
 |   File "/opt/conda/lib/python3.8/site-packages/timm/models/byobnet.py", line 1551, in forward_features
    x = self.stages(x)
 |   File "/opt/conda/lib/python3.8/site-packages/timm/models/byobnet.py", line 1559, in forward
    x = self.forward_features(x)
 |   File "benchmarks/dynamo/timm_models.py", line 302, in forward_pass
    return mod(*inputs)

ERROR:common:Failed for dynamo compile_fx raised RuntimeError: Unary Fusion behavior undefined.

While executing %mod_stages_0_0_conv3_1x1_conv : [#users=2] = call_module[target=mod_stages_0_0_conv3_1x1_conv](args = (%mul, %mod_stages_0_0_shortcut_conv), kwargs = {})
Original traceback:
Module stack: {'mod': <class 'timm.models.byobnet.ByobNet'>, 'mod_stages': <class 'torch.nn.modules.container.Sequential'>, 'mod_stages_0': <class 'torch.nn.modules.container.Sequential'>, 'mod_stages_0_0': <class 'timm.models.byobnet.BottleneckBlock'>, 'mod_stages_0_0_conv3_1x1': <class 'timm.models.layers.conv_bn_act.ConvNormAct'>, 'mod_stages_0_0_conv3_1x1_conv': <class 'torch.nn.modules.conv.Conv2d'>}
  File "/opt/conda/lib/python3.8/site-packages/timm/models/layers/conv_bn_act.py", line 35, in forward
    x = self.conv(x)
 |   File "/opt/conda/lib/python3.8/site-packages/timm/models/byobnet.py", line 1046, in forward
    x = self.conv3_1x1(x)
 |   File "/opt/conda/lib/python3.8/site-packages/timm/models/byobnet.py", line 1551, in forward_features
    x = self.stages(x)
 |   File "/opt/conda/lib/python3.8/site-packages/timm/models/byobnet.py", line 1559, in forward
    x = self.forward_features(x)
 |   File "benchmarks/dynamo/timm_models.py", line 302, in forward_pass
    return mod(*inputs)

Set torch._dynamo.config.verbose=True for more information

You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True
Traceback (most recent call last):
  File "/workspace/pytorch/torch/_subclasses/fake_tensor.py", line 856, in __torch_dispatch__
    r = func(*args, **kwargs)
  File "/workspace/pytorch/torch/_ops.py", line 285, in __call__
    return self._op(*args, **kwargs or {})
NotImplementedError: Could not run 'mkldnn::_convolution_pointwise.binary' with arguments from the 'Meta' backend. This could be because the operator doesn't exist for this backend, or was omitted during the selective/custom build process (if using custom build). If you are a Facebook employee using PyTorch on mobile, please visit https://fburl.com/ptmfixes for possible resolutions. 'mkldnn::_convolution_pointwise.binary' is only available for these backends: [CPU, MkldnnCPU, BackendSelect, Python, FuncTorchDynamicLayerBackMode, Functionalize, Named, Conjugate, Negative, ZeroTensor, ADInplaceOrView, AutogradOther, AutogradCPU, AutogradCUDA, AutogradXLA, AutogradMPS, AutogradXPU, AutogradHPU, AutogradLazy, Tracer, AutocastCPU, AutocastCUDA, FuncTorchBatched, FuncTorchVmapMode, Batched, VmapMode, FuncTorchGradWrapper, PythonTLSSnapshot, FuncTorchDynamicLayerFrontMode, PythonDispatcher].

CPU: registered at /workspace/pytorch/aten/src/ATen/native/mkldnn/Conv.cpp:716 [kernel]
MkldnnCPU: registered at /workspace/pytorch/aten/src/ATen/native/mkldnn/Conv.cpp:728 [kernel]
BackendSelect: fallthrough registered at /workspace/pytorch/aten/src/ATen/core/BackendSelectFallbackKernel.cpp:3 [backend fallback]
Python: registered at /workspace/pytorch/aten/src/ATen/core/PythonFallbackKernel.cpp:144 [backend fallback]
FuncTorchDynamicLayerBackMode: registered at /workspace/pytorch/aten/src/ATen/functorch/DynamicLayer.cpp:499 [backend fallback]
Functionalize: registered at /workspace/pytorch/aten/src/ATen/FunctionalizeFallbackKernel.cpp:291 [backend fallback]
Named: registered at /workspace/pytorch/aten/src/ATen/core/NamedRegistrations.cpp:7 [backend fallback]
Conjugate: registered at /workspace/pytorch/aten/src/ATen/ConjugateFallback.cpp:18 [backend fallback]
Negative: registered at /workspace/pytorch/aten/src/ATen/native/NegateFallback.cpp:19 [backend fallback]
ZeroTensor: registered at /workspace/pytorch/aten/src/ATen/ZeroTensorFallback.cpp:86 [backend fallback]
ADInplaceOrView: fallthrough registered at /workspace/pytorch/aten/src/ATen/core/VariableFallbackKernel.cpp:64 [backend fallback]
AutogradOther: fallthrough registered at /workspace/pytorch/aten/src/ATen/core/VariableFallbackKernel.cpp:35 [backend fallback]
AutogradCPU: fallthrough registered at /workspace/pytorch/aten/src/ATen/core/VariableFallbackKernel.cpp:39 [backend fallback]
AutogradCUDA: fallthrough registered at /workspace/pytorch/aten/src/ATen/core/VariableFallbackKernel.cpp:47 [backend fallback]
AutogradXLA: fallthrough registered at /workspace/pytorch/aten/src/ATen/core/VariableFallbackKernel.cpp:51 [backend fallback]
AutogradMPS: fallthrough registered at /workspace/pytorch/aten/src/ATen/core/VariableFallbackKernel.cpp:59 [backend fallback]
AutogradXPU: fallthrough registered at /workspace/pytorch/aten/src/ATen/core/VariableFallbackKernel.cpp:43 [backend fallback]
AutogradHPU: fallthrough registered at /workspace/pytorch/aten/src/ATen/core/VariableFallbackKernel.cpp:68 [backend fallback]
AutogradLazy: fallthrough registered at /workspace/pytorch/aten/src/ATen/core/VariableFallbackKernel.cpp:55 [backend fallback]
Tracer: registered at /workspace/pytorch/torch/csrc/autograd/TraceTypeManual.cpp:296 [backend fallback]
AutocastCPU: fallthrough registered at /workspace/pytorch/aten/src/ATen/autocast_mode.cpp:486 [backend fallback]
AutocastCUDA: fallthrough registered at /workspace/pytorch/aten/src/ATen/autocast_mode.cpp:354 [backend fallback]
FuncTorchBatched: registered at /workspace/pytorch/aten/src/ATen/functorch/LegacyBatchingRegistrations.cpp:801 [backend fallback]
FuncTorchVmapMode: fallthrough registered at /workspace/pytorch/aten/src/ATen/functorch/VmapModeRegistrations.cpp:28 [backend fallback]
Batched: registered at /workspace/pytorch/aten/src/ATen/BatchingRegistrations.cpp:1064 [backend fallback]
VmapMode: fallthrough registered at /workspace/pytorch/aten/src/ATen/VmapModeRegistrations.cpp:33 [backend fallback]
FuncTorchGradWrapper: registered at /workspace/pytorch/aten/src/ATen/functorch/TensorWrapper.cpp:189 [backend fallback]
PythonTLSSnapshot: registered at /workspace/pytorch/aten/src/ATen/core/PythonFallbackKernel.cpp:152 [backend fallback]
FuncTorchDynamicLayerFrontMode: registered at /workspace/pytorch/aten/src/ATen/functorch/DynamicLayer.cpp:495 [backend fallback]
PythonDispatcher: registered at /workspace/pytorch/aten/src/ATen/core/PythonFallbackKernel.cpp:148 [backend fallback]

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/workspace/pytorch/torch/_dynamo/output_graph.py", line 546, in call_user_compiler
    compiled_fn = compiler_fn(gm, self.example_inputs())
  File "/workspace/pytorch/torch/_dynamo/debug_utils.py", line 911, in debug_wrapper
    compiled_gm = compiler_fn(gm, example_inputs, **kwargs)
  File "/workspace/pytorch/torch/_inductor/compile_fx.py", line 398, in compile_fx
    return aot_autograd(
  File "/workspace/pytorch/torch/_dynamo/optimizations/training.py", line 80, in compiler_fn
    cg = aot_module_simplified(gm, example_inputs, **kwargs)
  File "/workspace/pytorch/functorch/_src/aot_autograd.py", line 2093, in aot_module_simplified
    compiled_fn = create_aot_dispatcher_function(
  File "/workspace/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/workspace/pytorch/functorch/_src/aot_autograd.py", line 1792, in create_aot_dispatcher_function
    compiled_fn = compiler_fn(flat_fn, fake_flat_tensor_args, aot_config)
  File "/workspace/pytorch/functorch/_src/aot_autograd.py", line 1268, in aot_wrapper_dedupe
    compiled_fn = compiler_fn(wrapped_flat_fn, deduped_flat_args, aot_config)
  File "/workspace/pytorch/functorch/_src/aot_autograd.py", line 867, in aot_dispatch_base
    fw_module = make_fx(flat_fn, aot_config.decompositions)(*flat_args)
  File "/workspace/pytorch/torch/fx/experimental/proxy_tensor.py", line 683, in wrapped
    t = dispatch_trace(wrap_key(func, args, fx_tracer), tracer=fx_tracer, concrete_args=tuple(phs))
  File "/workspace/pytorch/torch/_dynamo/eval_frame.py", line 209, in _fn
    return fn(*args, **kwargs)
  File "/workspace/pytorch/torch/fx/experimental/proxy_tensor.py", line 441, in dispatch_trace
    graph = tracer.trace(root, concrete_args)
  File "/workspace/pytorch/torch/_dynamo/eval_frame.py", line 209, in _fn
    return fn(*args, **kwargs)
  File "/workspace/pytorch/torch/fx/_symbolic_trace.py", line 756, in trace
    (self.create_arg(fn(*args)),),
  File "/workspace/pytorch/torch/fx/experimental/proxy_tensor.py", line 457, in wrapped
    out = f(*tensors)
  File "<string>", line 1, in <lambda>
  File "/workspace/pytorch/functorch/_src/aot_autograd.py", line 1266, in wrapped_flat_fn
    return flat_fn(*add_dupe_args(args))
  File "/workspace/pytorch/functorch/_src/aot_autograd.py", line 2065, in functional_call
    out = Interpreter(mod).run(*args[params_len:], **kwargs)
  File "/workspace/pytorch/torch/fx/interpreter.py", line 130, in run
    self.env[node] = self.run_node(node)
  File "/workspace/pytorch/torch/fx/interpreter.py", line 171, in run_node
    return getattr(self, n.op)(n.target, args, kwargs)
  File "/workspace/pytorch/torch/fx/interpreter.py", line 288, in call_module
    return submod(*args, **kwargs)
  File "/workspace/pytorch/torch/fx/_symbolic_trace.py", line 734, in module_call_wrapper
    return self.call_module(mod, forward, args, kwargs)
  File "/workspace/pytorch/torch/fx/experimental/proxy_tensor.py", line 407, in call_module
    return forward(*args, **kwargs)
  File "/workspace/pytorch/torch/fx/_symbolic_trace.py", line 727, in forward
    return _orig_module_call(mod, *args, **kwargs)
  File "/workspace/pytorch/torch/nn/modules/module.py", line 1480, in _call_impl
    return forward_call(*args, **kwargs)
  File "/workspace/pytorch/torch/_inductor/overrides.py", line 241, in forward
    return self._conv_forward(input, other, self.weight, self.bias)
  File "/workspace/pytorch/torch/_inductor/overrides.py", line 224, in _conv_forward
    return torch.ops.mkldnn._convolution_pointwise(
  File "/workspace/pytorch/torch/_ops.py", line 500, in __call__
    return self._op(*args, **kwargs or {})
  File "/workspace/pytorch/torch/_inductor/overrides.py", line 36, in __torch_function__
    return func(*args, **kwargs)
  File "/workspace/pytorch/torch/_ops.py", line 500, in __call__
    return self._op(*args, **kwargs or {})
  File "/workspace/pytorch/torch/fx/experimental/proxy_tensor.py", line 483, in __torch_dispatch__
    return self.inner_torch_dispatch(func, types, args, kwargs)
  File "/workspace/pytorch/torch/fx/experimental/proxy_tensor.py", line 508, in inner_torch_dispatch
    out = proxy_call(self, func, args, kwargs)
  File "/workspace/pytorch/torch/fx/experimental/proxy_tensor.py", line 345, in proxy_call
    out = func(*args, **kwargs)
  File "/workspace/pytorch/torch/_ops.py", line 285, in __call__
    return self._op(*args, **kwargs or {})
  File "/workspace/pytorch/torch/_subclasses/fake_tensor.py", line 861, in __torch_dispatch__
    return run_fallback_kernel(self, func, args, kwargs, not_implemented_error)
  File "/workspace/pytorch/torch/_subclasses/fake_tensor.py", line 1006, in run_fallback_kernel
    r = func(*args, **kwargs)
  File "/workspace/pytorch/torch/_ops.py", line 285, in __call__
    return self._op(*args, **kwargs or {})
RuntimeError: Unary Fusion behavior undefined.

While executing %mod_stages_0_0_conv3_1x1_conv : [#users=2] = call_module[target=mod_stages_0_0_conv3_1x1_conv](args = (%mul, %mod_stages_0_0_shortcut_conv), kwargs = {})
Original traceback:
Module stack: {'mod': <class 'timm.models.byobnet.ByobNet'>, 'mod_stages': <class 'torch.nn.modules.container.Sequential'>, 'mod_stages_0': <class 'torch.nn.modules.container.Sequential'>, 'mod_stages_0_0': <class 'timm.models.byobnet.BottleneckBlock'>, 'mod_stages_0_0_conv3_1x1': <class 'timm.models.layers.conv_bn_act.ConvNormAct'>, 'mod_stages_0_0_conv3_1x1_conv': <class 'torch.nn.modules.conv.Conv2d'>}
  File "/opt/conda/lib/python3.8/site-packages/timm/models/layers/conv_bn_act.py", line 35, in forward
    x = self.conv(x)
 |   File "/opt/conda/lib/python3.8/site-packages/timm/models/byobnet.py", line 1046, in forward
    x = self.conv3_1x1(x)
 |   File "/opt/conda/lib/python3.8/site-packages/timm/models/byobnet.py", line 1551, in forward_features
    x = self.stages(x)
 |   File "/opt/conda/lib/python3.8/site-packages/timm/models/byobnet.py", line 1559, in forward
    x = self.forward_features(x)
 |   File "benchmarks/dynamo/timm_models.py", line 302, in forward_pass
    return mod(*inputs)

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/workspace/pytorch/benchmarks/dynamo/common.py", line 1189, in warmup
    fn(model, example_inputs)
  File "/workspace/pytorch/torch/_dynamo/eval_frame.py", line 209, in _fn
    return fn(*args, **kwargs)
  File "/workspace/pytorch/torch/_dynamo/eval_frame.py", line 329, in catch_errors
    return callback(frame, cache_size)
  File "/workspace/pytorch/torch/_dynamo/convert_frame.py", line 468, in _convert_frame
    result = inner_convert(frame, cache_size)
  File "/workspace/pytorch/torch/_dynamo/convert_frame.py", line 102, in _fn
    return fn(*args, **kwargs)
  File "/workspace/pytorch/torch/_dynamo/utils.py", line 90, in time_wrapper
    r = func(*args, **kwargs)
  File "/workspace/pytorch/torch/_dynamo/convert_frame.py", line 339, in _convert_frame_assert
    return _compile(
  File "/workspace/pytorch/torch/_dynamo/convert_frame.py", line 395, in _compile
    out_code = transform_code_object(code, transform)
  File "/workspace/pytorch/torch/_dynamo/bytecode_transformation.py", line 341, in transform_code_object
    transformations(instructions, code_options)
  File "/workspace/pytorch/torch/_dynamo/convert_frame.py", line 382, in transform
    tracer.run()
  File "/workspace/pytorch/torch/_dynamo/symbolic_convert.py", line 1625, in run
    super().run()
  File "/workspace/pytorch/torch/_dynamo/symbolic_convert.py", line 484, in run
    and self.step()
  File "/workspace/pytorch/torch/_dynamo/symbolic_convert.py", line 454, in step
    getattr(self, inst.opname)(inst)
  File "/workspace/pytorch/torch/_dynamo/symbolic_convert.py", line 1687, in RETURN_VALUE
    self.output.compile_subgraph(self)
  File "/workspace/pytorch/torch/_dynamo/output_graph.py", line 440, in compile_subgraph
    self.compile_and_call_fx_graph(tx, list(reversed(stack_values)), root)
  File "/workspace/pytorch/torch/_dynamo/output_graph.py", line 511, in compile_and_call_fx_graph
    compiled_fn = self.call_user_compiler(gm)
  File "/workspace/pytorch/torch/_dynamo/output_graph.py", line 551, in call_user_compiler
    raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: compile_fx raised RuntimeError: Unary Fusion behavior undefined.

While executing %mod_stages_0_0_conv3_1x1_conv : [#users=2] = call_module[target=mod_stages_0_0_conv3_1x1_conv](args = (%mul, %mod_stages_0_0_shortcut_conv), kwargs = {})
Original traceback:
Module stack: {'mod': <class 'timm.models.byobnet.ByobNet'>, 'mod_stages': <class 'torch.nn.modules.container.Sequential'>, 'mod_stages_0': <class 'torch.nn.modules.container.Sequential'>, 'mod_stages_0_0': <class 'timm.models.byobnet.BottleneckBlock'>, 'mod_stages_0_0_conv3_1x1': <class 'timm.models.layers.conv_bn_act.ConvNormAct'>, 'mod_stages_0_0_conv3_1x1_conv': <class 'torch.nn.modules.conv.Conv2d'>}
  File "/opt/conda/lib/python3.8/site-packages/timm/models/layers/conv_bn_act.py", line 35, in forward
    x = self.conv(x)
 |   File "/opt/conda/lib/python3.8/site-packages/timm/models/byobnet.py", line 1046, in forward
    x = self.conv3_1x1(x)
 |   File "/opt/conda/lib/python3.8/site-packages/timm/models/byobnet.py", line 1551, in forward_features
    x = self.stages(x)
 |   File "/opt/conda/lib/python3.8/site-packages/timm/models/byobnet.py", line 1559, in forward
    x = self.forward_features(x)
 |   File "benchmarks/dynamo/timm_models.py", line 302, in forward_pass
    return mod(*inputs)

Set torch._dynamo.config.verbose=True for more information

You can suppress this exception and fall back to eager by setting:
    torch._dynamo.config.suppress_errors = True

Minified repro

from math import inf
import torch
from torch import tensor, device
import torch.fx as fx
import torch._dynamo
from torch._dynamo.testing import rand_strided
from torch._dynamo.debug_utils import run_fwd_maybe_bwd
from torch._dynamo.debug_utils import same_two_models

# REPLACEABLE COMMENT FOR TESTING PURPOSES

args = [((1, 64, 64, 64), (262144, 4096, 64, 1), torch.float32, 'cpu', True), ((64,), (1,), torch.float32, 'cpu', False), ((64,), (1,), torch.float32, 'cpu', False), ((64,), (1,), torch.float32, 'cpu', True), ((64,), (1,), torch.float32, 'cpu', True)]
args = [rand_strided(sh, st, dt, dev).requires_grad_(rg) for (sh, st, dt, dev, rg) in args]

from torch.nn import *
class Repro(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.self_mod_stages_0_1_conv1_1x1_bn_drop = Identity()
        self.self_mod_stages_0_1_conv1_1x1_bn_act = SiLU(inplace=True)
        self.self_mod_stages_0_1_conv2_kxk_conv = Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=4, bias=False)

    def forward(self, self_mod_stages_0_1_conv1_1x1_conv, self_mod_stages_0_1_conv1_1x1_bn_running_mean, self_mod_stages_0_1_conv1_1x1_bn_running_var, self_mod_stages_0_1_conv1_1x1_bn_weight, self_mod_stages_0_1_conv1_1x1_bn_bias):
        batch_norm_7 = torch.nn.functional.batch_norm(self_mod_stages_0_1_conv1_1x1_conv, self_mod_stages_0_1_conv1_1x1_bn_running_mean, self_mod_stages_0_1_conv1_1x1_bn_running_var, self_mod_stages_0_1_conv1_1x1_bn_weight, self_mod_stages_0_1_conv1_1x1_bn_bias, False, 0.1, 1e-05);  self_mod_stages_0_1_conv1_1x1_conv = self_mod_stages_0_1_conv1_1x1_bn_running_mean = self_mod_stages_0_1_conv1_1x1_bn_running_var = self_mod_stages_0_1_conv1_1x1_bn_weight = self_mod_stages_0_1_conv1_1x1_bn_bias = None
        self_mod_stages_0_1_conv1_1x1_bn_drop = self.self_mod_stages_0_1_conv1_1x1_bn_drop(batch_norm_7);  batch_norm_7 = None
        self_mod_stages_0_1_conv1_1x1_bn_act = self.self_mod_stages_0_1_conv1_1x1_bn_act(self_mod_stages_0_1_conv1_1x1_bn_drop);  self_mod_stages_0_1_conv1_1x1_bn_drop = None
        self_mod_stages_0_1_conv2_kxk_conv = self.self_mod_stages_0_1_conv2_kxk_conv(self_mod_stages_0_1_conv1_1x1_bn_act);  self_mod_stages_0_1_conv1_1x1_bn_act = None
        return (self_mod_stages_0_1_conv2_kxk_conv,)

mod = Repro()
opt_mod = torch._dynamo.optimize("inductor")(mod)

with torch.cuda.amp.autocast(enabled=False):
    ref = run_fwd_maybe_bwd(mod, args)
    res = run_fwd_maybe_bwd(opt_mod, args)

pytorch / torchdynamo

[Inductor] [CPU] Crash failure in timm models (mkldnn._convolution_pointwise.binary) #1975

🐛 Describe the bug

Error logs

Minified repro