# test_loader set batch_size = 1
miss_cnt = 0
result = torch.zeros(10).cuda()
correct_freeze=0
correct_before_freeze=0
for i, (data, target) in enumerate(test_loader, 1):
data, target = data.to(device), target.to(device)
with torch.no_grad():
output1 = model_freeze.quantize_inference(data)[0]
output2 = model_before_freeze(data)[0]
result+=(output1-output2).abs()
pred1 = output1.argmax().item()
pred2 = output2.argmax().item()
miss_cnt += not(pred1==pred2)
correct_freeze += pred1==target.item()
correct_before_freeze += pred2==target.item()
result=result/len(test_loader.dataset)
print(f'qat model correct {correct_freeze}\nori model correct {correct_before_freeze}')
print(f'qat miss ori {miss_cnt}')
print(f'diff {result}')
qat model correct 9881
ori model correct 9880
qat miss ori 7
diff tensor([0.0587, 0.0569, 0.0789, 0.0846, 0.0658, 0.0600, 0.0645, 0.0644, 0.0987, 0.0617], device='cuda:0')
您好, 我有两个问题想请教一下: (模型和训练MNIST的代码直接使用, 没做改动)
做完QAT后获得的权重, 我发现在经过scale变换后, 仍然不是一个很贴近整数的浮点数, round()之后就会产生不小误差。 请问这是因为QAT算法无法实现的效果, 还是说无所谓能不能变成一个很贴近整数的浮点数?
2.我在quantization_aware_training.py最后加了以下代码, 在做完QAT之后, 想比较模型“freeze前使用直接inference“以及”freeze后使用quantize_inference()“的结果
我发现两者的output差距并不小, 请问这么比较有意义吗?或者说我是否应该期望量化后的模型和原始模型在最后数值的输出上也要很接近?