Open theonly22 opened 4 years ago
截断确实会造成误差,现在在截断前先进行了缩放以减小这个误差: class activation_quantize_fn(nn.Module): def init(self, a_bit): super(activation_quantize_fn, self).init() assert a_bit <= 8 or a_bit == 32 self.a_bit = a_bit self.uniform_q = uniform_quantize(k=a_bit)
def forward(self, x): if self.a_bit == 32: activation_q = x else: activation_q = self.uniform_q(torch.clamp(x 0.1, 0, 1)) #特征A截断前先进行缩放( 0.1),以减小截断误差
return activation_q
变换到(0,1)以使用通用的量化公式,误差是可以接受的
class activation_quantize_fn(nn.Module): def init(self, a_bit): super(activation_quantize_fn, self).init() assert a_bit <= 8 or a_bit == 32 self.a_bit = a_bit self.uniform_q = uniform_quantize(k=a_bit)
def forward(self, x): if self.a_bit == 32: activation_q = x else: activation_q = self.uniform_q(torch.clamp(x, 0, 1))
print(np.unique(activation_q.detach().numpy()))
特征值量化前强行归一到(0,1),这一点不是很理解,有些特征值是比较大