import timeit
import numpy as np
import timm
import torch
import torch._dynamo as dynamo
model = timm.create_model("resnext101_32x8d", pretrained=True, num_classes=2).to(
device="cuda:0"
)
cuda_backends = ["inductor", "onnxrt"]
def benchmark(backend="inductor"):
dummy_inputs = torch.randn(64, 3, 7, 7).to(device="cuda:0")
opt_model = torch.compile(model, backend=backend)
_ = opt_model(dummy_inputs)
runtimes = timeit.repeat(lambda: opt_model(dummy_inputs), number=1, repeat=25)
print(f"Average latency (seconds): {np.mean(runtimes)} for {backend}.")
torch._dynamo.reset()
for backend in cuda_backends:
print(benchmark(backend))
The above fails when the backend is set to "onnxrt".
My onnxruntime version is: 1.14.1 and onnx version is 1.13.0.
Error logs
Average latency (seconds): 0.03335898948002068 for inductor.
None
/usr/local/lib/python3.8/dist-packages/torch/jit/_check.py:172: UserWarning: The TorchScript type system doesn't support instance-level annotations on empty non-base types in `__init__`. Instead, either 1) use a type annotation in the class body, or 2) wrap the type in `torch.jit.Attribute`.
warnings.warn("The TorchScript type system doesn't support "
============= Diagnostic Run torch.onnx.export version 2.0.0+cu117 =============
verbose: False, log level: Level.ERROR
======================= 0 NONE 0 NOTE 0 WARNING 0 ERROR ========================
Traceback (most recent call last):
File "/usr/local/lib/python3.8/dist-packages/torch/_dynamo/output_graph.py", line 670, in call_user_compiler
compiled_fn = compiler_fn(gm, self.fake_example_inputs())
File "/usr/local/lib/python3.8/dist-packages/torch/_dynamo/debug_utils.py", line 1055, in debug_wrapper
compiled_gm = compiler_fn(gm, example_inputs)
File "/usr/local/lib/python3.8/dist-packages/torch/_dynamo/backends/common.py", line 107, in wrapper
return fn(model, inputs, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/torch/_dynamo/backends/onnxrt.py", line 51, in onnxrt
return onnxrt(gm, example_inputs, filename=tmp.name)
File "/usr/local/lib/python3.8/dist-packages/torch/_dynamo/backends/common.py", line 107, in wrapper
return fn(model, inputs, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/torch/_dynamo/backends/onnxrt.py", line 76, in onnxrt
assert provider in onnxruntime.get_available_providers()
AssertionError
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "benchmark_backends.py", line 26, in <module>
print(benchmark(backend))
File "benchmark_backends.py", line 18, in benchmark
_ = opt_model(dummy_inputs)
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/torch/_dynamo/eval_frame.py", line 82, in forward
return self.dynamo_ctx(self._orig_mod.forward)(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/torch/_dynamo/eval_frame.py", line 209, in _fn
return fn(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/torch/_dynamo/eval_frame.py", line 337, in catch_errors
return callback(frame, cache_size, hooks)
File "/usr/local/lib/python3.8/dist-packages/torch/_dynamo/convert_frame.py", line 404, in _convert_frame
result = inner_convert(frame, cache_size, hooks)
File "/usr/local/lib/python3.8/dist-packages/torch/_dynamo/convert_frame.py", line 104, in _fn
return fn(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/torch/_dynamo/convert_frame.py", line 262, in _convert_frame_assert
return _compile(
File "/usr/local/lib/python3.8/dist-packages/torch/_dynamo/utils.py", line 163, in time_wrapper
r = func(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/torch/_dynamo/convert_frame.py", line 324, in _compile
out_code = transform_code_object(code, transform)
File "/usr/local/lib/python3.8/dist-packages/torch/_dynamo/bytecode_transformation.py", line 445, in transform_code_object
transformations(instructions, code_options)
File "/usr/local/lib/python3.8/dist-packages/torch/_dynamo/convert_frame.py", line 311, in transform
tracer.run()
File "/usr/local/lib/python3.8/dist-packages/torch/_dynamo/symbolic_convert.py", line 1726, in run
super().run()
File "/usr/local/lib/python3.8/dist-packages/torch/_dynamo/symbolic_convert.py", line 576, in run
and self.step()
File "/usr/local/lib/python3.8/dist-packages/torch/_dynamo/symbolic_convert.py", line 540, in step
getattr(self, inst.opname)(inst)
File "/usr/local/lib/python3.8/dist-packages/torch/_dynamo/symbolic_convert.py", line 1792, in RETURN_VALUE
self.output.compile_subgraph(
File "/usr/local/lib/python3.8/dist-packages/torch/_dynamo/output_graph.py", line 517, in compile_subgraph
self.compile_and_call_fx_graph(tx, list(reversed(stack_values)), root)
File "/usr/local/lib/python3.8/dist-packages/torch/_dynamo/output_graph.py", line 588, in compile_and_call_fx_graph
compiled_fn = self.call_user_compiler(gm)
File "/usr/local/lib/python3.8/dist-packages/torch/_dynamo/utils.py", line 163, in time_wrapper
r = func(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/torch/_dynamo/output_graph.py", line 675, in call_user_compiler
raise BackendCompilerFailed(self.compiler_fn, e) from e
torch._dynamo.exc.BackendCompilerFailed: onnxrt raised AssertionError:
Set torch._dynamo.config.verbose=True for more information
You can suppress this exception and fall back to eager by setting:
torch._dynamo.config.suppress_errors = True
Minified repro
No response
Versions
Collecting environment information...
PyTorch version: 2.0.0+cu117
Is debug build: False
CUDA used to build PyTorch: 11.7
ROCM used to build PyTorch: N/A
OS: Ubuntu 20.04.5 LTS (x86_64)
GCC version: (Ubuntu 9.4.0-1ubuntu1~20.04.1) 9.4.0
Clang version: Could not collect
CMake version: version 3.24.1
Libc version: glibc-2.31
Python version: 3.8.10 (default, Nov 14 2022, 12:59:47) [GCC 9.4.0] (64-bit runtime)
Python platform: Linux-4.19.0-23-cloud-amd64-x86_64-with-glibc2.29
Is CUDA available: True
CUDA runtime version: 12.0.140
CUDA_MODULE_LOADING set to: LAZY
GPU models and configuration: GPU 0: NVIDIA A100-SXM4-40GB
Nvidia driver version: 510.47.03
cuDNN version: Probably one of the following:
/usr/lib/x86_64-linux-gnu/libcudnn.so.8.7.0
/usr/lib/x86_64-linux-gnu/libcudnn_adv_infer.so.8.7.0
/usr/lib/x86_64-linux-gnu/libcudnn_adv_train.so.8.7.0
/usr/lib/x86_64-linux-gnu/libcudnn_cnn_infer.so.8.7.0
/usr/lib/x86_64-linux-gnu/libcudnn_cnn_train.so.8.7.0
/usr/lib/x86_64-linux-gnu/libcudnn_ops_infer.so.8.7.0
/usr/lib/x86_64-linux-gnu/libcudnn_ops_train.so.8.7.0
HIP runtime version: N/A
MIOpen runtime version: N/A
Is XNNPACK available: True
CPU:
Architecture: x86_64
CPU op-mode(s): 32-bit, 64-bit
Byte Order: Little Endian
Address sizes: 46 bits physical, 48 bits virtual
CPU(s): 12
On-line CPU(s) list: 0-11
Thread(s) per core: 2
Core(s) per socket: 6
Socket(s): 1
NUMA node(s): 1
Vendor ID: GenuineIntel
CPU family: 6
Model: 85
Model name: Intel(R) Xeon(R) CPU @ 2.20GHz
Stepping: 7
CPU MHz: 2200.200
BogoMIPS: 4400.40
Hypervisor vendor: KVM
Virtualization type: full
L1d cache: 192 KiB
L1i cache: 192 KiB
L2 cache: 6 MiB
L3 cache: 38.5 MiB
NUMA node0 CPU(s): 0-11
Vulnerability Itlb multihit: Not affected
Vulnerability L1tf: Not affected
Vulnerability Mds: Mitigation; Clear CPU buffers; SMT Host state unknown
Vulnerability Meltdown: Not affected
Vulnerability Mmio stale data: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown
Vulnerability Retbleed: Mitigation; Enhanced IBRS
Vulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl and seccomp
Vulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization
Vulnerability Spectre v2: Mitigation; Enhanced IBRS, IBPB conditional, RSB filling, PBRSB-eIBRS SW sequence
Vulnerability Srbds: Not affected
Vulnerability Tsx async abort: Mitigation; Clear CPU buffers; SMT Host state unknown
Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single ssbd ibrs ibpb stibp ibrs_enhanced fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves arat avx512_vnni md_clear arch_capabilities
Versions of relevant libraries:
[pip3] numpy==1.22.2
[pip3] pytorch-quantization==2.1.2
[pip3] torch==2.0.0
[pip3] torch-tensorrt==1.4.0.dev0
[pip3] torchtext==0.13.0a0+fae8e8c
[pip3] torchvision==0.15.1
[pip3] triton==2.0.0
[conda] Could not collect
cc @ezyang @soumith @msaroufim @wconstab @ngimel @bdhirsh
🐛 Describe the bug
The above fails when the
backend
is set to"onnxrt"
.My
onnxruntime
version is:1.14.1
andonnx
version is1.13.0
.Error logs
Minified repro
No response
Versions
cc @ezyang @soumith @msaroufim @wconstab @ngimel @bdhirsh