Log evaluation loss while testing model

priteshgohil commented 4 years ago

I would love to see the loss for the test set while evaluating the mode during training. I have modified code to return loss when testing,

class SSDDetector(nn.Module):
    def __init__(self, cfg):
        super().__init__()
        self.cfg = cfg
        self.backbone = build_backbone(cfg)
        self.box_head = build_box_head(cfg)

    def forward(self, images, targets=None):
        features = self.backbone(images)
        detections, detector_losses = self.box_head(features, targets)
        if self.training:
            return detector_losses
        return detections, detector_losses

However, detector_losses are empty during evaluation or validation. Your suggestions will be most helpful.

lufficc commented 4 years ago

SSD won't compute the losses when testing since it's unnecessary and time consuming. But if you just want to see, here are some suggestions:

compute targets when testing by pass target_transform: https://github.com/lufficc/SSD/blob/master/ssd/data/build.py#L32
remember pass to model: https://github.com/lufficc/SSD/blob/master/ssd/engine/inference.py#L43
compute loss when testing here https://github.com/lufficc/SSD/blob/master/ssd/modeling/box_head/box_head.py#L39 follows https://github.com/lufficc/SSD/blob/master/ssd/modeling/box_head/box_head.py#L31-L34

priteshgohil commented 4 years ago

Thanks @lufficc , It worked. Want to add one more step

Return targets value as in self.is_train L20: https://github.com/lufficc/SSD/blob/50373c79b861d5d239be4206fafc6661cea040b4/ssd/data/build.py#L26

I have one more question, classification loss seems ok (avg 13) but regression loss value is very high (avg 40,000). I calculate loss exactly same as in trainer.py with reduce_loss_dict function. Any hint what is going wrong?

priteshgohil commented 4 years ago

Following are the losses that I get from the log. Training loss is fine, but not sure what's wrong with validation loss. I checked the inference results in results__00xxxx.txt and looks good. Val loss is logged at every 25th iteration and model is trained with VOC_2007 train val split.

train_losses val_losses

Following are the changes In trainer.py

if args.eval_step > 0 and iteration % args.eval_step == 0 and not iteration == max_iter:
    if summary_writer:
        eval_results = do_evaluation(cfg, model, distributed=args.distributed, summary_writer=summary_writer, iteration=iteration)

In inference.py

def compute_on_dataset(model, data_loader, device, summary_writer, iteration):
    results_dict = {}
    total_loss=[]
    cls_loss=[]
    reg_loss=[]
    for batch in tqdm(data_loader):
        images, targets, image_ids = batch
        cpu_device = torch.device("cpu")
        with torch.no_grad():
            outputs, loss_dict = model(images.to(device), targets=targets.to(device))

            #print("loss_dict: {}".format(loss_dict))
            loss = sum(loss for loss in loss_dict.values())
            # reduce losses over all GPUs for logging purposes
            l_dict_reduced = reduce_loss_dict(loss_dict)
            l_reduced = sum(loss for loss in l_dict_reduced.values())

            outputs = [o.to(cpu_device) for o in outputs]
        total_loss.append(l_reduced)
        cls_loss.append(l_dict_reduced['cls_loss'])
        reg_loss.append(l_dict_reduced['reg_loss'])
        results_dict.update(
            {img_id: result for img_id, result in zip(image_ids, outputs)}
        )
    print("total_losses: {} \nSum: {} entries: {}".format(total_loss,sum(total_loss),len(total_loss)))
    print("loss: {}".format(loss))
    print("final loss_dict: {}".format(loss_dict))
    # Log losses
    if summary_writer:
        global_step = iteration
        summary_writer.add_scalar('val_losses/total_loss', sum(total_loss)/len(total_loss), global_step=global_step)
        summary_writer.add_scalar('val_losses/cls_loss', sum(cls_loss) / len(cls_loss), global_step=global_step)
        summary_writer.add_scalar('val_losses/reg_loss', sum(total_loss) / len(total_loss), global_step=global_step)
        print("Debug: cls vs reg loss {}: {}".format(cls_loss, reg_loss))
    return results_dict

def inference(model, data_loader, dataset_name, device, output_folder=None, use_cached=False, **kwargs):
    dataset = data_loader.dataset
    logger = logging.getLogger("SSD.inference")
    logger.info("Evaluating {} dataset({} images):".format(dataset_name, len(dataset)))
    predictions_path = os.path.join(output_folder, 'predictions.pth')
    summary_writer = kwargs['summary_writer']
    del kwargs['summary_writer']
    iteration = kwargs['iteration']
    if use_cached and os.path.exists(predictions_path):
        logger.info("Using saved model predictions.pth at {}:".format(output_folder))
        predictions = torch.load(predictions_path, map_location='cpu')
    else:
        predictions = compute_on_dataset(model, data_loader, device, summary_writer, iteration)
        synchronize()
        predictions = _accumulate_predictions_from_multiple_gpus(predictions)
    if not is_main_process():
        return
    if output_folder:
        torch.save(predictions, predictions_path)
    return evaluate(dataset=dataset, predictions=predictions, output_dir=output_folder, **kwargs)

_Edit in boxhead.py

    def _forward_test(self, cls_logits, bbox_pred, targets):
        # Add loss
        gt_boxes, gt_labels = targets['boxes'], targets['labels']
        reg_loss, cls_loss = self.loss_evaluator(cls_logits, bbox_pred, gt_labels, gt_boxes)
        loss_dict = dict(
            reg_loss=reg_loss,
            cls_loss=cls_loss,
        )
        # Convert detections
        if self.priors is None:
            self.priors = PriorBox(self.cfg)().to(bbox_pred.device)
        scores = F.softmax(cls_logits, dim=2)
        boxes = box_utils.convert_locations_to_boxes(
            bbox_pred, self.priors, self.cfg.MODEL.CENTER_VARIANCE, self.cfg.MODEL.SIZE_VARIANCE
        )
        boxes = box_utils.center_form_to_corner_form(boxes)
        detections = (scores, boxes)
        detections = self.post_processor(detections)
        return detections, loss_dict

priteshgohil commented 4 years ago

Finally, I can get validation loss after the following edit. The reason was that, network outputs normalized bounding box and dataloader outputs (target label) was not normalized (value of target cords was too high). Therefore error value is quite large for regression loss.

Following are the additional changes in https://github.com/lufficc/SSD/blob/50373c79b861d5d239be4206fafc6661cea040b4/ssd/data/transforms/__init__.py#L20

    else:
        transform = [
            ConvertFromInts(), # Convert img to float32
            ToPercentCoords(), # Normalize BBox Cords
            Resize(cfg.INPUT.IMAGE_SIZE),
            SubtractMeans(cfg.INPUT.PIXEL_MEAN),
            ToTensor()
        ]

And validation loss looks good,

loss

lufficc commented 4 years ago

Great job. I missed this point...

lufficc / SSD

Log evaluation loss while testing model #164