Open currylym opened 3 years ago
项目是tf版的,dice loss一直不收敛
def dice_layer(logits, labels, num_labels, input_mask, alpha=0.01, smooth=1):
input_mask = tf.cast(input_mask, dtype=tf.float32)
input_mask = tf.reshape(input_mask, [-1, 1])
logits = tf.reshape(logits, [-1, num_labels])
prob = tf.sigmoid(logits)
prob = prob * input_mask
labels = tf.reshape(labels, [-1])
one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)
one_hot_labels = one_hot_labels * input_mask
intersection = tf.reduce_sum(
(1 - prob) ** alpha * prob * one_hot_labels) + smooth
denominator = tf.reduce_sum(
(1 - prob) ** alpha * prob) + tf.reduce_sum(one_hot_labels) + smooth
loss = 1 - 2 * intersection / denominator
predict = tf.math.argmax(prob, axis=-1)
return loss, predict
prob = tf.sigmoid(logits)
您好,感谢提问。
按照我的理解,如果是多分类任务的话:
prob = tf.sigmoid(logits)
应该是prob = tf.nn.softmax(logits)
, 对应的predict = tf.math.argmax(prob, axis=-1)
.
如果是二分类任务的话:
prob = tf.sigmoid(logits)
,对应的predict = tf.math.argmax(prob, axis=-1)
应该是tf.cast(tf.math.greater(prob, 0.5), tf.int32)
另外将denominator = tf.reduce_sum((1 - prob) ** alpha * prob) + tf.reduce_sum(one_hot_labels) + smooth
变成denominator = tf.reduce_sum(tf.math.square((1 - prob) ** alpha * prob)) + tf.reduce_sum(one_hot_labels) + smooth
同样可以是训练过程的收敛变快。
如果我的理解不对,请您指出。 感谢!
请问如果要复现mrc-ner模型下dice loss的效果,采用当前仓库dice loss的写法么🤔 看到https://github.com/ShannonAI/mrc-for-flat-nested-ner/blob/master/loss/adaptive_dice_loss.py 仓库下也有对应的实现,有点疑问咨询下大佬,感谢🙏
您好,感谢提问。 请参照当前repository的dice loss实现。
prob = tf.sigmoid(logits)
您好,感谢提问。 按照我的理解,如果是多分类任务的话:
prob = tf.sigmoid(logits)
应该是prob = tf.nn.softmax(logits)
, 对应的predict = tf.math.argmax(prob, axis=-1)
. 如果是二分类任务的话:prob = tf.sigmoid(logits)
,对应的predict = tf.math.argmax(prob, axis=-1)
应该是tf.cast(tf.math.greater(prob, 0.5), tf.int32)
如果我的理解不对,请您指出。 感谢!
感谢回复!我是二分类的情况,我测试下denominator改写方案试试🙏
项目是tf版的,dice loss一直不收敛
def dice_layer(logits, labels, num_labels, input_mask, alpha=0.01, smooth=1): input_mask = tf.cast(input_mask, dtype=tf.float32) input_mask = tf.reshape(input_mask, [-1, 1]) logits = tf.reshape(logits, [-1, num_labels]) prob = tf.sigmoid(logits) prob = prob * input_mask labels = tf.reshape(labels, [-1]) one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32) one_hot_labels = one_hot_labels * input_mask intersection = tf.reduce_sum( (1 - prob) ** alpha * prob * one_hot_labels) + smooth denominator = tf.reduce_sum( (1 - prob) ** alpha * prob) + tf.reduce_sum(one_hot_labels) + smooth loss = 1 - 2 * intersection / denominator predict = tf.math.argmax(prob, axis=-1) return loss, predict
知道问题了,代码中smooth加的位置有误
def dice_layer(logits, labels, num_labels, input_mask, alpha=0.01, smooth=1):
input_mask = tf.cast(input_mask, dtype=tf.float32)
input_mask = tf.reshape(input_mask, [-1, 1])
logits = tf.reshape(logits, [-1, num_labels])
prob = tf.sigmoid(logits)
prob = prob * input_mask
labels = tf.reshape(labels, [-1])
one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)
one_hot_labels = one_hot_labels * input_mask
intersection = tf.reduce_sum(
(1 - prob) ** alpha * prob * one_hot_labels)
denominator = tf.reduce_sum(
(1 - prob) ** alpha * prob) + tf.reduce_sum(one_hot_labels)
loss = 1 - (2 * intersection + smooth) / (denominator + smooth)
predict = tf.math.argmax(prob, axis=-1)
return loss, predict
项目是tf版的,dice loss一直不收敛
def dice_layer(logits, labels, num_labels, input_mask, alpha=0.01, smooth=1): input_mask = tf.cast(input_mask, dtype=tf.float32) input_mask = tf.reshape(input_mask, [-1, 1]) logits = tf.reshape(logits, [-1, num_labels]) prob = tf.sigmoid(logits) prob = prob * input_mask labels = tf.reshape(labels, [-1]) one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32) one_hot_labels = one_hot_labels * input_mask intersection = tf.reduce_sum( (1 - prob) ** alpha * prob * one_hot_labels) + smooth denominator = tf.reduce_sum( (1 - prob) ** alpha * prob) + tf.reduce_sum(one_hot_labels) + smooth loss = 1 - 2 * intersection / denominator predict = tf.math.argmax(prob, axis=-1) return loss, predict
知道问题了,代码中smooth加的位置有误
def dice_layer(logits, labels, num_labels, input_mask, alpha=0.01, smooth=1): input_mask = tf.cast(input_mask, dtype=tf.float32) input_mask = tf.reshape(input_mask, [-1, 1]) logits = tf.reshape(logits, [-1, num_labels]) prob = tf.sigmoid(logits) prob = prob * input_mask labels = tf.reshape(labels, [-1]) one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32) one_hot_labels = one_hot_labels * input_mask intersection = tf.reduce_sum( (1 - prob) ** alpha * prob * one_hot_labels) denominator = tf.reduce_sum( (1 - prob) ** alpha * prob) + tf.reduce_sum(one_hot_labels) loss = 1 - (2 * intersection + smooth) / (denominator + smooth) predict = tf.math.argmax(prob, axis=-1) return loss, predict
您好,请问在二分类中,num_labels是2,还是1?另外logits维度是?
您好,将dice-loss应用到二分类任务,num_labels是1,logits维度是(batch_size, 1)
项目是 tf 版的,dice loss 一直不收敛
def dice_layer(logits, labels, num_labels, input_mask, alpha=0.01, smooth=1): input_mask = tf.cast(input_mask, dtype=tf.float32) input_mask = tf.reshape(input_mask, [-1, 1]) logits = tf.reshape(logits, [-1, num_labels]) prob = tf.sigmoid(logits) prob = prob * input_mask labels = tf.reshape(labels, [-1]) one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32) one_hot_labels = one_hot_labels * input_mask intersection = tf.reduce_sum( (1 - prob) ** alpha * prob * one_hot_labels) + smooth denominator = tf.reduce_sum( (1 - prob) ** alpha * prob) + tf.reduce_sum(one_hot_labels) + smooth loss = 1 - 2 * intersection / denominator predict = tf.math.argmax(prob, axis=-1) return loss, predict
知道问题了,代码中 smooth 加的位置有误
def dice_layer(logits, labels, num_labels, input_mask, alpha=0.01, smooth=1): input_mask = tf.cast(input_mask, dtype=tf.float32) input_mask = tf.reshape(input_mask, [-1, 1]) logits = tf.reshape(logits, [-1, num_labels]) prob = tf.sigmoid(logits) prob = prob * input_mask labels = tf.reshape(labels, [-1]) one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32) one_hot_labels = one_hot_labels * input_mask intersection = tf.reduce_sum( (1 - prob) ** alpha * prob * one_hot_labels) denominator = tf.reduce_sum( (1 - prob) ** alpha * prob) + tf.reduce_sum(one_hot_labels) loss = 1 - (2 * intersection + smooth) / (denominator + smooth) predict = tf.math.argmax(prob, axis=-1) return loss, predict
这两个有什么区别吗?为啥换个位置就行了?
请问如果要复现mrc-ner模型下dice loss的效果,采用当前仓库dice loss的写法么🤔 看到https://github.com/ShannonAI/mrc-for-flat-nested-ner/blob/master/loss/adaptive_dice_loss.py 仓库下也有对应的实现,有点疑问咨询下大佬,感谢🙏