ShannonAI / dice_loss_for_NLP

The repo contains the code of the ACL2020 paper `Dice Loss for Data-imbalanced NLP Tasks`
Apache License 2.0
272 stars 39 forks source link

dice loss疑问 #5

Open currylym opened 3 years ago

currylym commented 3 years ago

请问如果要复现mrc-ner模型下dice loss的效果,采用当前仓库dice loss的写法么🤔 看到 仓库下也有对应的实现,有点疑问咨询下大佬,感谢🙏

currylym commented 3 years ago

项目是tf版的,dice loss一直不收敛

def dice_layer(logits, labels, num_labels, input_mask, alpha=0.01, smooth=1):
    input_mask = tf.cast(input_mask, dtype=tf.float32)
    input_mask = tf.reshape(input_mask, [-1, 1])

    logits = tf.reshape(logits, [-1, num_labels])
    prob = tf.sigmoid(logits)
    prob = prob * input_mask

    labels = tf.reshape(labels, [-1])
    one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)
    one_hot_labels = one_hot_labels * input_mask

    intersection = tf.reduce_sum(
        (1 - prob) ** alpha * prob * one_hot_labels) + smooth
    denominator = tf.reduce_sum(
        (1 - prob) ** alpha * prob) + tf.reduce_sum(one_hot_labels) + smooth

    loss = 1 - 2 * intersection / denominator
    predict = tf.math.argmax(prob, axis=-1)
    return loss, predict
xiaoya-li commented 3 years ago
prob = tf.sigmoid(logits)

您好,感谢提问。 按照我的理解,如果是多分类任务的话: prob = tf.sigmoid(logits)应该是prob = tf.nn.softmax(logits), 对应的predict = tf.math.argmax(prob, axis=-1). 如果是二分类任务的话: prob = tf.sigmoid(logits),对应的predict = tf.math.argmax(prob, axis=-1)应该是tf.cast(tf.math.greater(prob, 0.5), tf.int32)

另外将denominator = tf.reduce_sum((1 - prob) ** alpha * prob) + tf.reduce_sum(one_hot_labels) + smooth变成denominator = tf.reduce_sum(tf.math.square((1 - prob) ** alpha * prob)) + tf.reduce_sum(one_hot_labels) + smooth同样可以是训练过程的收敛变快。

如果我的理解不对,请您指出。 感谢!

xiaoya-li commented 3 years ago

请问如果要复现mrc-ner模型下dice loss的效果,采用当前仓库dice loss的写法么🤔 看到 仓库下也有对应的实现,有点疑问咨询下大佬,感谢🙏

您好,感谢提问。 请参照当前repository的dice loss实现。

currylym commented 3 years ago
prob = tf.sigmoid(logits)

您好,感谢提问。 按照我的理解,如果是多分类任务的话: prob = tf.sigmoid(logits)应该是prob = tf.nn.softmax(logits), 对应的predict = tf.math.argmax(prob, axis=-1). 如果是二分类任务的话: prob = tf.sigmoid(logits),对应的predict = tf.math.argmax(prob, axis=-1)应该是tf.cast(tf.math.greater(prob, 0.5), tf.int32)

如果我的理解不对,请您指出。 感谢!


currylym commented 3 years ago

项目是tf版的,dice loss一直不收敛

def dice_layer(logits, labels, num_labels, input_mask, alpha=0.01, smooth=1):
    input_mask = tf.cast(input_mask, dtype=tf.float32)
    input_mask = tf.reshape(input_mask, [-1, 1])

    logits = tf.reshape(logits, [-1, num_labels])
    prob = tf.sigmoid(logits)
    prob = prob * input_mask

    labels = tf.reshape(labels, [-1])
    one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)
    one_hot_labels = one_hot_labels * input_mask

    intersection = tf.reduce_sum(
        (1 - prob) ** alpha * prob * one_hot_labels) + smooth
    denominator = tf.reduce_sum(
        (1 - prob) ** alpha * prob) + tf.reduce_sum(one_hot_labels) + smooth

    loss = 1 - 2 * intersection / denominator
    predict = tf.math.argmax(prob, axis=-1)
    return loss, predict


def dice_layer(logits, labels, num_labels, input_mask, alpha=0.01, smooth=1):
    input_mask = tf.cast(input_mask, dtype=tf.float32)
    input_mask = tf.reshape(input_mask, [-1, 1])

    logits = tf.reshape(logits, [-1, num_labels])
    prob = tf.sigmoid(logits)
    prob = prob * input_mask

    labels = tf.reshape(labels, [-1])
    one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)
    one_hot_labels = one_hot_labels * input_mask

    intersection = tf.reduce_sum(
        (1 - prob) ** alpha * prob * one_hot_labels)
    denominator = tf.reduce_sum(
        (1 - prob) ** alpha * prob) + tf.reduce_sum(one_hot_labels)

    loss = 1 - (2 * intersection + smooth) / (denominator + smooth)
    predict = tf.math.argmax(prob, axis=-1)
    return loss, predict
cjymz886 commented 3 years ago

项目是tf版的,dice loss一直不收敛

def dice_layer(logits, labels, num_labels, input_mask, alpha=0.01, smooth=1):
    input_mask = tf.cast(input_mask, dtype=tf.float32)
    input_mask = tf.reshape(input_mask, [-1, 1])

    logits = tf.reshape(logits, [-1, num_labels])
    prob = tf.sigmoid(logits)
    prob = prob * input_mask

    labels = tf.reshape(labels, [-1])
    one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)
    one_hot_labels = one_hot_labels * input_mask

    intersection = tf.reduce_sum(
        (1 - prob) ** alpha * prob * one_hot_labels) + smooth
    denominator = tf.reduce_sum(
        (1 - prob) ** alpha * prob) + tf.reduce_sum(one_hot_labels) + smooth

    loss = 1 - 2 * intersection / denominator
    predict = tf.math.argmax(prob, axis=-1)
    return loss, predict


def dice_layer(logits, labels, num_labels, input_mask, alpha=0.01, smooth=1):
    input_mask = tf.cast(input_mask, dtype=tf.float32)
    input_mask = tf.reshape(input_mask, [-1, 1])

    logits = tf.reshape(logits, [-1, num_labels])
    prob = tf.sigmoid(logits)
    prob = prob * input_mask

    labels = tf.reshape(labels, [-1])
    one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)
    one_hot_labels = one_hot_labels * input_mask

    intersection = tf.reduce_sum(
        (1 - prob) ** alpha * prob * one_hot_labels)
    denominator = tf.reduce_sum(
        (1 - prob) ** alpha * prob) + tf.reduce_sum(one_hot_labels)

    loss = 1 - (2 * intersection + smooth) / (denominator + smooth)
    predict = tf.math.argmax(prob, axis=-1)
    return loss, predict


xiaoya-li commented 3 years ago

您好,将dice-loss应用到二分类任务,num_labels是1,logits维度是(batch_size, 1)

YimianDai commented 2 years ago

项目是 tf 版的,dice loss 一直不收敛

def dice_layer(logits, labels, num_labels, input_mask, alpha=0.01, smooth=1):
    input_mask = tf.cast(input_mask, dtype=tf.float32)
    input_mask = tf.reshape(input_mask, [-1, 1])

    logits = tf.reshape(logits, [-1, num_labels])
    prob = tf.sigmoid(logits)
    prob = prob * input_mask

    labels = tf.reshape(labels, [-1])
    one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)
    one_hot_labels = one_hot_labels * input_mask

    intersection = tf.reduce_sum(
        (1 - prob) ** alpha * prob * one_hot_labels) + smooth
    denominator = tf.reduce_sum(
        (1 - prob) ** alpha * prob) + tf.reduce_sum(one_hot_labels) + smooth

    loss = 1 - 2 * intersection / denominator
    predict = tf.math.argmax(prob, axis=-1)
    return loss, predict

知道问题了,代码中 smooth 加的位置有误

def dice_layer(logits, labels, num_labels, input_mask, alpha=0.01, smooth=1):
    input_mask = tf.cast(input_mask, dtype=tf.float32)
    input_mask = tf.reshape(input_mask, [-1, 1])

    logits = tf.reshape(logits, [-1, num_labels])
    prob = tf.sigmoid(logits)
    prob = prob * input_mask

    labels = tf.reshape(labels, [-1])
    one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)
    one_hot_labels = one_hot_labels * input_mask

    intersection = tf.reduce_sum(
        (1 - prob) ** alpha * prob * one_hot_labels)
    denominator = tf.reduce_sum(
        (1 - prob) ** alpha * prob) + tf.reduce_sum(one_hot_labels)

    loss = 1 - (2 * intersection + smooth) / (denominator + smooth)
    predict = tf.math.argmax(prob, axis=-1)
    return loss, predict
