#Calibrate the model using percentile calibration technique.
with torch.no_grad():
collect_stats(q_model, dataloader_train, num_batches=32)
compute_amax(q_model, method="max")
#Loading the Torchscript model and compiling it into a TensorRT model
qat_model = torch.jit.load("model_qat.jit.pt").eval()
compile_spec = {"inputs": [torch_tensorrt.Input([64, 3, 224, 224])],
"enabled_precisions": torch.int8,
"truncate_long_and_double": True
}
trt_mod = torch_tensorrt.compile(qat_model, **compile_spec,)
This code raise RuntimeError
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
Cell In[14], line 7
2 qat_model = torch.jit.load("model_qat.jit.pt").eval()
3 compile_spec = {"inputs": [torch_tensorrt.Input([64, 3, 224, 224])],
4 "enabled_precisions": torch.int8,
5 "truncate_long_and_double": True
6 }
----> 7 trt_mod = torch_tensorrt.compile(qat_model, **compile_spec,)
File /usr/local/lib/python3.8/dist-packages/torch_tensorrt/_compile.py:125, in compile(module, ir, inputs, enabled_precisions, **kwargs)
120 logging.log(
121 logging.Level.Info,
122 "Module was provided as a torch.nn.Module, trying to script the module with torch.jit.script. In the event of a failure please preconvert your module to TorchScript",
123 )
124 ts_mod = torch.jit.script(module)
--> 125 return torch_tensorrt.ts.compile(
126 ts_mod, inputs=inputs, enabled_precisions=enabled_precisions, **kwargs
127 )
128 elif target_ir == _IRType.fx:
129 if (
130 torch.float16 in enabled_precisions
131 or torch_tensorrt.dtype.half in enabled_precisions
132 ):
File /usr/local/lib/python3.8/dist-packages/torch_tensorrt/ts/_compiler.py:136, in compile(module, inputs, input_signature, device, disable_tf32, sparse_weights, enabled_precisions, refit, debug, capability, num_avg_timing_iters, workspace_size, dla_sram_size, dla_local_dram_size, dla_global_dram_size, calibrator, truncate_long_and_double, require_full_compilation, min_block_size, torch_executed_ops, torch_executed_modules)
110 raise ValueError(
111 f"require_full_compilation is enabled however the list of modules and ops to run in torch is not empty. Found: torch_executed_ops: {torch_executed_ops}, torch_executed_modules: {torch_executed_modules}"
112 )
114 spec = {
115 "inputs": inputs,
116 "input_signature": input_signature,
(...)
133 },
134 }
--> 136 compiled_cpp_mod = _C.compile_graph(module._c, _parse_compile_spec(spec))
137 compiled_module = torch.jit._recursive.wrap_cpp_module(compiled_cpp_mod)
138 return compiled_module
RuntimeError: [Error thrown at core/conversion/conversionctx/ConversionCtx.cpp:169] Building serialized network failed in TensorRT
Environment
Build information about Torch-TensorRT can be found by turning on debug messages
I'm running this inside Nvidia container: nvcr.io/nvidia/pytorch:23.04-py3
Bug Description
I'm trying to apply quantization aware training(QAT) procedure to ViT model Using this example notebook: https://github.com/pytorch/TensorRT/blob/main/notebooks/qat-ptq-workflow.ipynb Get error:
To Reproduce
I do everything like in the example notebook section 4. Quantization Aware Training
Environment