Hi, I'm trying to run quantiation aware training with per_channel quantization. However, setting per_tensor to False gives me this error:
File ".../tinynn/converter/operators/torch/quantized.py", line 138, in parse
self.parse_common(graph_converter)
File ".../tinynn/converter/operators/torch/quantized.py", line 76, in parse_common
weight_tensor = self.create_attr_tensor(weight)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File ".../tinynn/converter/operators/torch/base.py", line 252, in create_attr_tensor
tensor = tfl.Tensor(
^^^^^^^^^^^
File ".../tinynn/converter/operators/tflite/base.py", line 223, in __init__
assert tensor.q_zero_point() == asym_s8_offset, (
^^^^^^^^^^^^^^^^^^^^^
RuntimeError: Expected quantizer->qscheme() == kPerTensorAffine to be true, but got false. (Could this error message be improved? If so, please report an enhancement request to PyTorch.)
This should reproduce the issue:
import torch.nn as nn
import torch
from tinynn.converter import TFLiteConverter
from tinynn.graph.quantization.quantizer import QATQuantizer
from tinynn.graph.tracer import model_tracer
class DummyModel(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(3, 64, 3, 1, 1)
def forward(self, x):
return self.conv1(x)
def _main():
dummy_input = torch.rand(1, 3, 224, 224)
with model_tracer():
model = DummyModel()
config = {
"per_tensor": False
}
quantizer = QATQuantizer(model, dummy_input, work_dir='test', config=config)
qat_model = quantizer.quantize()
qat_model(dummy_input)
with torch.no_grad():
qat_model.eval()
qat_model.cpu()
qat_model = quantizer.convert(qat_model)
torch.backends.quantized.engine = quantizer.backend
converter = TFLiteConverter(qat_model, dummy_input, tflite_path='test/qat_model.tflite')
converter.convert()
if __name__ == '__main__':
_main()
@spacycoder Hi, please read the comments in the example code [1][2].
That is, for "per_tensor": False, you will need to pass quantize_target_type='int8' for the TFLiteConverter.
Hi, I'm trying to run quantiation aware training with per_channel quantization. However, setting
per_tensor
to False gives me this error:This should reproduce the issue: