experiencor / keras-yolo2

Easy training on custom dataset. Various backends (MobileNet and SqueezeNet) supported. A YOLO demo to detect raccoon run entirely in brower is accessible at https://git.io/vF7vI (not on Windows).
MIT License
1.73k stars 787 forks source link

About your Loss version function, and switch it for other #464

Open kascesar opened 3 years ago

kascesar commented 3 years ago

Hello, `I studied your code for a long time, but I still don't understand why your loss function has things that I haven't seen in the yolo / yolov2 articles. In particular, I've noticed that you apply tf.nn.sparse_categorical_ce_from_logits to class_predicted, but I've seen other implementations that simply apply tf.math.softmax to it, then compute the error tf.math.sqrt (predicted_logits - true_logits). What do you think about this ?

Another question, is if you recommend me to use a loss function like in the following code: `import numpy as np import tensorflow as tf

class Loss: def init(self, anchors, lambda_coord=5, lambda_noobj=1): self.anchors = anchors self.lambda_coord = lambda_coord self.lambda_noobj = lambda_noobj self.priors = self.make_priors() self.first_run = True pass

def center_grid(self, input_tensor):
    '''
    build grid (xi, yi) for fix x_center, y_center in the yolo prediction

    parameters:
    -----------
    input_tensor: ndarray or tensorflow.python.framework.ops.EagerTensor
    return : ndarray shape (batch_size, wg, hg, 1, 1, n_boxs, 2)
    '''
    batch_size, wg, hg, n_boxs = input_tensor.shape[:4]
    xg = np.arange(wg)
    yg = np.arange(hg)
    grid = np.meshgrid(xg, yg)
    grid_x = np.reshape(grid[0], (wg, hg, 1))
    grid_y = np.reshape(grid[1], (wg, hg, 1))
    grid = np.stack((grid_x, grid_y), -1)
    #  tile
    return np.tile(grid, [batch_size, 1, 1, n_boxs, 1])

def make_priors(self):
    '''
    make a priors  for fix the anchros predictions
    '''
    if len(self.anchors)/2 != len(self.anchors)//2:
        raise ValueError('anchors must be a par number')
    n_boxs2 = 0
    for i in self.anchors:
        n_boxs2 += 1
    n_boxs = n_boxs2//2
    return np.reshape(self.anchors, (1, 1, 1, n_boxs, 2))

def min_max_boxes(self, tensor_xy, tensor_wh):
    '''
    compute the mins, maxs, form boxes
    '''

    half_wh = tensor_wh / 2.
    boxs_mins = tensor_xy - half_wh
    boxs_maxs = tensor_xy + half_wh

    return boxs_mins, boxs_maxs

def compute_iou(self, tensor_xy_t, tensor_wh_t, tensor_xy_p, tensor_wh_p):
    '''
    compute the IoU with xy and wh given tensors

    parameters:
    -----------
    *_t : are true values
    *_p : are predicted values

    return : IoU each box tensor (all at once)
    '''
    # intersectio areas
    t_mins, t_maxs = self.min_max_boxes(tensor_xy_t, tensor_wh_t)
    p_mins, p_maxs = self.min_max_boxes(tensor_xy_p, tensor_wh_p)
    intersect_mins = tf.math.maximum(p_mins,  t_mins)
    intersect_maxes = tf.math.minimum(p_maxs, t_maxs)
    intersect_wh = tf.math.maximum(intersect_maxes - intersect_mins, 0.)
    intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]

    # union areas
    true_areas = tensor_wh_t[..., 0] * tensor_wh_t[..., 1]
    pred_areas = tensor_wh_p[..., 0] * tensor_wh_p[..., 1]
    union_areas = pred_areas + true_areas - intersect_areas

    # compute IoU
    iou = tf.math.truediv(intersect_areas, union_areas)

    return iou

def xy_loss(self, true_xy, predict_xy, true_object):
    '''
    compute x, y cordinate loss like the paper said L2 norm
    '''
    square = tf.math.square(true_xy - predict_xy)
    masqued_square = square*true_object[..., None]
    loss = tf.math.reduce_sum(masqued_square, axis=[1, 2, 3, 4])
    return loss

def wh_loss(self, true_wh, predict_wh, true_object):
    '''
    compute L2 norm for w, h box cordinates
    '''
    squared_sqrt = tf.square(tf.sqrt(true_wh) - tf.sqrt(predict_wh))
    masqued_squared_sqrt = true_object[..., None]*squared_sqrt
    loss = tf.math.reduce_sum(masqued_squared_sqrt, axis=[1, 2, 3, 4])
    return loss

def object_loss(self, iou, true_object, predict_object):
    '''
    Explanation:
        our net is a  bounding box predictors, based on anchors, so,
    our metric for object detection must be based on IoU metric. This is
    the nature of yolo bbox predictions based, predict a type based bb.
    '''
    square = tf.math.square(iou - predict_object)
    masqued_square = true_object * square
    loss = tf.math.reduce_sum(masqued_square, axis=[1, 2, 3])
    return loss

def no_obj_loss(self, iou, true_object, predict_object):
    '''
    explanation:
        punish hegth IoU, so wee filter Higths IoU (< 0.6) where are noobj.
    Remember that (1 - true_object) is a mask where 0 are obj and 1 noobj.
    '''
    # definitions first
    higthst_iou = tf.math.reduce_max(iou, axis=-1)
    mask = (tf.cast(higthst_iou < 0.6, dtype=tf.float32)[..., None]
            * (1 - true_object))  # noobj mask
    # compute loss
    squared = tf.math.square(0 - predict_object)  # cero is noobj
    masqued_square = mask * squared
    loss = tf.math.reduce_sum(masqued_square, axis=[1, 2, 3])
    return loss

def class_loss(self, true_logits, predict_logits, true_object):
    # TODO:
    # Test that
    # sparse_softmax_cross_entropy_with_logits may work well
    # tf.reduce_sum(loss_class * class_mask) / (nb_class_box + 1e-6)
    # source: ( https://github.com/experiencor/keras-yolo2/blob/master/
    #           Yolo%20Step-by-Step.ipynb )

    squared = tf.square(true_logits - predict_logits)
    masqued_square = true_object[..., None] * squared
    loss = tf.math.reduce_sum(masqued_square, axis=[1, 2, 3, 4])
    return loss

def compute_loss(self,
                 predict_xy, predict_wh, predict_object, predict_logits,
                 true_xy, true_wh, true_object, true_logits):
    iou = self.compute_iou(true_xy, true_wh, predict_xy, predict_wh)
    loss = (
      + self.lambda_coord*self.xy_loss(true_xy, predict_xy, true_object)
      + self.lambda_coord*self.wh_loss(true_wh, predict_wh, true_object)
      + self.object_loss(iou, true_object, predict_object)
      + self.lambda_noobj*self.no_obj_loss(iou, true_object, predict_object
                                           )
      + self.class_loss(true_logits, predict_logits, true_object))

    return loss

def loss(self, y_pred, y_true):
    if y_pred.shape != y_true.shape:
        raise ValueError('imput shape and output shape must be equal')

    if self.first_run:
        self.first_run = False
        self.wh_grid = self.center_grid(y_pred)

    # predicted x, y, w, h, conf and class box cordinates adjustment
    predict_xy = tf.nn.sigmoid(y_pred[..., :2]) + self.wh_grid
    predict_wh = tf.math.exp(y_pred[..., 2:4]) * self.priors
    predict_object = tf.nn.sigmoid(y_pred[..., 4])
    predict_logits = tf.nn.softmax(y_pred[..., 5:])

    # predicted and true x, y, w, h box cordinates
    true_xy = y_true[..., 0:2]
    true_wh = y_true[..., 2:4]
    true_logits = y_true[..., 5:]
    true_object = y_true[..., 4]
    loss = self.compute_loss(predict_xy, predict_wh,
                             predict_object, predict_logits,
                             true_xy, true_wh,
                             true_object, true_logits)
    return loss`

I hope you can help me with this doubt that torments me a lot, thnks!

kascesar commented 3 years ago

i think think the code is ruined by --init-- def in the class