The predicted landmarks appear out of face box

try-agaaain commented 2 years ago

In order to shorten the detection time of the face detect, I predict the face box of the current frame by using the landmark of the previous frame of the image. In some cases, the landmarks predicted by PIPNet predicted would appear outside my face box.

In the image below, the area in the red box is the image I entered into the network, and the green dot is the landmarks predicted by PIPNet.

Some of these landmarks appear outside the box, does anyone know why?

try-agaaain commented 2 years ago

Here's part of my code

def face_feature(image, cfg, net, device, box=[], use_face_boxes_detector=False):
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
    preprocess = transforms.Compose([transforms.Resize((cfg.input_size, cfg.input_size)), transforms.ToTensor(), normalize])
    # 如果使用face_boxes，则box可以为空，否则不能为空
    assert len(box) != 0 or use_face_boxes_detector, \
        print('You must pass a face box that is not empty, ',
              'or set use_face_boxes_detector=True to detect face box')

    image_height, image_width, _ = image.shape
    if use_face_boxes_detector:
        detector = FaceBoxesDetector('FaceBoxes', '../FaceBoxesV2/weights/FaceBoxesV2.pth', cfg.use_gpu, device)
        my_thresh = 0.6
        detections, _ = detector.detect(image, my_thresh, 1)
        if len(detections) >= 1:
            box = detections[0][2:]
            det_xmin = box[0]
            det_ymin = box[1]
            det_width = box[2]
            det_height = box[3]
            det_xmax = det_xmin + det_width - 1
            det_ymax = det_ymin + det_height - 1
    else:
        det_xmin = box[0]
        det_ymin = box[1]
        det_xmax = box[2] - 1
        det_ymax = box[3] - 1
        det_width = det_xmax -det_xmin + 1
        det_height = det_ymax - det_ymin + 1
        cv2.rectangle(image, (det_xmin, det_ymin), (det_xmax, det_ymax), (0, 0, 255), 2)

    det_crop = image[det_ymin:det_ymax, det_xmin:det_xmax, :]
    # cv2.imshow('The cropped image', det_crop)
    # cv2.waitKey(0)
    # det_crop = cv2.resize(det_crop, (cfg.input_size, cfg.input_size))
    inputs = Image.fromarray(det_crop[:,:,::-1].astype('uint8'), 'RGB')

    inputs = preprocess(inputs).unsqueeze(0)
    inputs = inputs.to(device)

    lms_pred_x, lms_pred_y, lms_pred_nb_x, lms_pred_nb_y, outputs_cls, max_cls = forward_pip(net, inputs, preprocess, cfg.input_size, cfg.net_stride, cfg.num_nb)
    lms_pred = torch.cat((lms_pred_x, lms_pred_y), dim=1).flatten()
    meanface_indices, reverse_index1, reverse_index2, max_len = get_meanface(os.path.join('../data', cfg.data_name, 'meanface.txt'), cfg.num_nb)
    tmp_nb_x = lms_pred_nb_x[reverse_index1, reverse_index2].view(cfg.num_lms, max_len)
    tmp_nb_y = lms_pred_nb_y[reverse_index1, reverse_index2].view(cfg.num_lms, max_len)
    tmp_x = torch.mean(torch.cat((lms_pred_x, tmp_nb_x), dim=1), dim=1).view(-1,1)
    tmp_y = torch.mean(torch.cat((lms_pred_y, tmp_nb_y), dim=1), dim=1).view(-1,1)
    lms_pred_merge = torch.cat((tmp_x, tmp_y), dim=1).flatten()
    lms_pred = lms_pred.cpu().numpy()
    lms_pred_merge = lms_pred_merge.cpu().numpy()
    landmarks = []
    for i in range(cfg.num_lms):
        x_pred = lms_pred_merge[i*2] * det_width + det_xmin
        y_pred = lms_pred_merge[i*2+1] * det_height + det_ymin
        landmarks.append([x_pred, y_pred])
        cv2.circle(image, (int(x_pred), int(y_pred)), 1, (0, 255, 0), 1)
    cv2.imshow('In face_feature function', image)
    cv2.waitKey(0)
    return np.array(landmarks)

jhb86253817 commented 2 years ago

Hi, I think is actually normal to predict outside because the face is outside, and there is no restrict to prevent it being outside.

try-agaaain commented 2 years ago

I am a beginner, and thank you very much for your answer

jhb86253817 / PIPNet

The predicted landmarks appear out of face box #24