Use paddle.nn.functional.cross_entropy to build LabelSmoothingCrossEntropyLoss class, so it can pass in more arguments.
I ran the unittest using code as blow.
In most cases, the unittest result is OK. But in a few cases, the two methods yield slightly different results, for example like AssertionError: Tenso[21 chars]oat32, place=CPUPlace, stop_gradient=True, [2.33890343]) != Tenso[21 chars]oat32, place=CPUPlace, stop_gradient=True, [2.33890367]).
import paddle
import paddle.nn.functional as F
import paddle.nn as nn
import unittest
import random
def repeat(times):
def repeatHelper(f):
def callHelper(*args):
for i in range(0, times):
f(*args)
return callHelper
return repeatHelper
class TestLabelSmoothingLoss(unittest.TestCase):
@repeat(3)
def setUp(self):
N = 512
C = 10 # num_classes
smoothing = random.random()
weight = paddle.rand([C])
reduction = 'sum'
name = 'smoothingCELoss'
self.output = paddle.rand(shape=[N, C])
self.label = paddle.randint(shape=[N], low=1, high=C)
# old loss
self.criterion0 = LabelSmoothingCrossEntropyLoss_old(smoothing=smoothing)
# new loss
self.criterion1 = LabelSmoothingCrossEntropyLoss(smoothing=smoothing)
# new loss para with weight & reduction & name
self.criterion2 = LabelSmoothingCrossEntropyLoss(smoothing=smoothing,
weight=weight,
reduction=reduction,
name=name)
def test_is_equal(self):
self.assertEqual(self.criterion1(self.output, self.label), self.criterion0(self.output, self.label))
def test_with_para(self):
self.assertIsInstance(self.criterion2(self.output, self.label), paddle.Tensor)
# new loss
class LabelSmoothingCrossEntropyLoss(nn.Layer):
def __init__(self,
smoothing=0.1,
weight=None,
reduction='mean',
axis=-1,
use_softmax=True,
name=None):
super().__init__()
assert 0 <= smoothing < 1.0
self.smoothing = smoothing
self.weight = weight
self.reduction = reduction
self.axis = axis
self.use_softmax = use_softmax
self.name = name
def forward(self, x, target):
target = F.one_hot(target, num_classes=x.shape[1])
target = F.label_smooth(target, epsilon=self.smoothing)
loss = F.cross_entropy(
x,
target,
weight=self.weight,
reduction=self.reduction,
soft_label=True,
axis=self.axis,
use_softmax=self.use_softmax,
name=self.name)
return loss
# old loss
class LabelSmoothingCrossEntropyLoss_old(nn.Layer):
def __init__(self, smoothing=0.1):
super().__init__()
assert 0 <= smoothing < 1.0
self.smoothing = smoothing
self.confidence = 1 - smoothing
def forward(self, x, target):
log_probs = F.log_softmax(x) # [N, num_classes]
# target_index is used to get prob for each of the N samples
target_index = paddle.zeros([x.shape[0], 2], dtype='int64') # [N, 2]
target_index[:, 0] = paddle.arange(x.shape[0])
target_index[:, 1] = target
nll_loss = -log_probs.gather_nd(index=target_index) # index: [N]
smooth_loss = -log_probs.mean(axis=-1)
loss = self.confidence * nll_loss + self.smoothing * smooth_loss
return loss.mean()
if __name__ == '__main__':
paddle.set_device('cpu')
unittest.main()
Use paddle.nn.functional.cross_entropy to build LabelSmoothingCrossEntropyLoss class, so it can pass in more arguments. I ran the unittest using code as blow. In most cases, the unittest result is OK. But in a few cases, the two methods yield slightly different results, for example like
AssertionError: Tenso[21 chars]oat32, place=CPUPlace, stop_gradient=True, [2.33890343]) != Tenso[21 chars]oat32, place=CPUPlace, stop_gradient=True, [2.33890367])
.