class SigmoidFocalClassificationLoss(nn.Module):
"""
Sigmoid focal cross entropy loss.
"""
def __init__(self, gamma: float = 2.0, alpha: float = 0.25):
"""
Args:
gamma: Weighting parameter to balance loss for hard and easy examples.
alpha: Weighting parameter to balance loss for positive and negative examples.
"""
super(SigmoidFocalClassificationLoss, self).__init__()
self.alpha = alpha
self.gamma = gamma
@staticmethod
def sigmoid_cross_entropy_with_logits(input: torch.Tensor, target: torch.Tensor):
""" PyTorch Implementation for tf.nn.sigmoid_cross_entropy_with_logits:
max(x, 0) - x * z + log(1 + exp(-abs(x))) in
https://www.tensorflow.org/api_docs/python/tf/nn/sigmoid_cross_entropy_with_logits
Args:
input: (B, #anchors, #classes) float tensor.
Predicted logits for each class
target: (B, #anchors, #classes) float tensor.
One-hot encoded classification targets
Returns:
loss: (B, #anchors, #classes) float tensor.
Sigmoid cross entropy loss without reduction
"""
loss = torch.clamp(input, min=0) - input * target + \
torch.log1p(torch.exp(-torch.abs(input)))
return loss
def forward(self, input: torch.Tensor, target: torch.Tensor, weights: torch.Tensor):
"""
Args:
input: (B, #anchors, #classes) float tensor.
Predicted logits for each class
target: (B, #anchors, #classes) float tensor.
One-hot encoded classification targets
weights: (B, #anchors) float tensor.
Anchor-wise weights.
Returns:
weighted_loss: (B, #anchors, #classes) float tensor after weighting.
"""
pred_sigmoid = torch.sigmoid(input)
alpha_weight = target * self.alpha + (1 - target) * (1 - self.alpha)
pt = target * (1.0 - pred_sigmoid) + (1.0 - target) * pred_sigmoid
focal_weight = alpha_weight * torch.pow(pt, self.gamma)
bce_loss = self.sigmoid_cross_entropy_with_logits(input, target)
loss = focal_weight * bce_loss
if weights.shape.__len__() == 2 or \
(weights.shape.__len__() == 1 and target.shape.__len__() == 2):
weights = weights.unsqueeze(-1)
assert weights.shape.__len__() == loss.shape.__len__()
return loss * weights
Here are codes for focal loss. However, the computation of pt = target * (1.0 - pred_sigmoid) + (1.0 - target) * pred_sigmoid seems to have a mistake. It should be pt = target * pred_sigmoid + (1.0 - target) * (1.0 - pred_sigmoid), am I right?
Here are codes for focal loss. However, the computation of
pt = target * (1.0 - pred_sigmoid) + (1.0 - target) * pred_sigmoid
seems to have a mistake. It should bept = target * pred_sigmoid + (1.0 - target) * (1.0 - pred_sigmoid)
, am I right?