Traceback (most recent call last):
File "xla/benchmarks/experiment_runner.py", line 906, in <module>
main()
File "xla/benchmarks/experiment_runner.py", line 902, in main
runner.run()
File "xla/benchmarks/experiment_runner.py", line 59, in run
self.run_single_config()
File "xla/benchmarks/experiment_runner.py", line 247, in run_single_config
metrics, last_output = self.run_once_and_gather_metrics(
File "xla/benchmarks/experiment_runner.py", line 324, in run_once_and_gather_metrics
output, _ = loop(iter_fn=self._default_iter_fn)
File "xla/benchmarks/experiment_runner.py", line 293, in loop
output, timing, trace = iter_fn(benchmark_experiment, benchmark_model,
File "xla/benchmarks/experiment_runner.py", line 209, in _default_iter_fn
output = benchmark_model.model_iter_fn(
File "torch/_dynamo/eval_frame.py", line 417, in _fn
return fn(*args, **kwargs)
File "xla/benchmarks/benchmark_model.py", line 154, in eval
def eval(self, inputs, collect_full_output=False):
File "torch/_dynamo/eval_frame.py", line 417, in _fn
return fn(*args, **kwargs)
File "torch/_dynamo/external_utils.py", line 25, in inner
return fn(*args, **kwargs)
File "torch/_functorch/aot_autograd.py", line 903, in forward
return compiled_fn(full_args)
File "torch/_functorch/_aot_autograd/utils.py", line 81, in g
return f(*args)
File "torch/_functorch/_aot_autograd/runtime_wrappers.py", line 101, in runtime_wrapper
all_outs = call_func_at_runtime_with_args(
File "torch/_functorch/_aot_autograd/utils.py", line 105, in call_func_at_runtime_with_args
out = normalize_as_list(f(args))
File "torch/_functorch/_aot_autograd/jit_compile_runtime_wrappers.py", line 118, in rng_functionalization_wrapper
return compiled_fw(args)
File "torch/_inductor/codecache.py", line 863, in __call__
return self.get_current_callable()(inputs)
File "torch/_inductor/compile_fx.py", line 612, in run
return model(new_inputs)
File "torch/_inductor/codecache.py", line 891, in _run_from_cache
return compiled_graph.compiled_artifact(inputs)
File "/tmp/torchinductor/hs/chskmhbw2wqwcaspxmne4wp6haqn7nxxc2ltoc5apjcf4kspo6py.py", line 6057, in call
buf320 = extern_kernels.convolution(buf318, buf319, stride=(1, 1), padding=(1, 1), dilation=(1, 1), transposed=False, output_padding=(0, 0), groups=1, bias=None)
RuntimeError: cuDNN error: CUDNN_STATUS_EXECUTION_FAILED
🐛 Bug
Environment
@miladm @JackCaoG