emilianavt / OpenSeeFace

Robust realtime face and facial landmark tracking on CPU with Unity integration
BSD 2-Clause "Simplified" License
1.46k stars 152 forks source link

landmark model without detection model #5

Closed marzi9696 closed 4 years ago

marzi9696 commented 4 years ago

Hi.thanks for answering the last issue I posted and thanks again for this great repo. I almost figured the code out and tried to customize it for my own purpose of use the only problem here is that the frames I want to use are frames of faces and I don't want to use the detection model . I tested this approach and it decreased the model accuracy.now my question is: could you please tell me exactly what kind of preprocessing has to be done on detected faces frames before feeding it to the landmark detector?or what could possibly be the reason for this decrease in model accuracy after deleting the detection part.

` def predict(self, frame, additional_faces=[]): self.frame_count += 1 start = time.perf_counter() im = frame

    duration_fd = 0.0
    duration_pp = 0.0
    duration_model = 0.0
    duration_pnp = 0.0

    #new_faces = []
    #new_faces.extend(self.faces)
    bonus_cutoff = len(self.faces)
    #new_faces.extend(additional_faces)
    #self.wait_count += 1
    #if self.detected == 0:
    #    start_fd = time.perf_counter()
    #    if self.use_retinaface > 0:
    #        retinaface_detections = self.retinaface.detect_retina(frame)
    #        new_faces.extend(retinaface_detections)
    #    else:
    #        new_faces.extend(self.detect_faces(frame))
    #    duration_fd = 1000 * (time.perf_counter() - start_fd)
    #    self.wait_count = 0
    #elif self.detected < self.max_faces:
    #    if self.use_retinaface > 0:
    #        new_faces.extend(self.retinaface_scan.get_results())
    #    if self.wait_count >= self.scan_every:
    #        if self.use_retinaface > 0:
    #            self.retinaface_scan.background_detect(frame)
    #        else:
    #            start_fd = time.perf_counter()
    #            new_faces.extend(self.detect_faces(frame))
    #            duration_fd = 1000 * (time.perf_counter() - start_fd)
    #            self.wait_count = 0
    #else:
    #    self.wait_count = 0

    #if len(new_faces) < 1:
    #    duration = (time.perf_counter() - start) * 1000
    #    if not self.silent:
    #        print(f"Took {duration:.2f}ms")
    #    return []

    crops = []
    crop_info = []
    num_crops = 1
    #for j, (x,y,w,h) in enumerate((0,0,self.width,self.height)):
    #SET THIS TO THIS BOUNDING BOX BEACUSE IT'S ALREADY A FRAME OF FACE
    (x,y,w,h) = (0,0,self.width,self.height)
    #(crop_x1,crop_y1,crop_x2,crop_y2) = (0,0,self.width,self.height)
    crop_x1 = x - int(w * 0.1)
    crop_y1 = y - int(h * 0.125)
    crop_x2 = x + w + int(w * 0.1)
    crop_y2 = y + h + int(h * 0.125)

    crop_x1, crop_y1 = clamp_to_im((crop_x1, crop_y1), self.width, self.height)
    crop_x2, crop_y2 = clamp_to_im((crop_x2, crop_y2), self.width, self.height)

    scale_x = float(crop_x2 - crop_x1) / self.res
    scale_y = float(crop_y2 - crop_y1) / self.res

    start_pp = time.perf_counter()
    cv2.imwrite('marzi'+str(start_pp)+'.jpg',im[crop_y1:crop_y2, crop_x1:crop_x2])
    #bounding_box = (0,0,self.width,self.height)
    crop = self.preprocess(im, (crop_x1, crop_y1, crop_x2,crop_y2))
    #crop = self.preprocess(im, bounding_box)
    duration_pp += 1000 * (time.perf_counter() - start_pp)
    crops.append(crop)
    #crop_info.append((crop_x1, crop_y1, scale_x, scale_y, 0.0 if j >= bonus_cutoff 0.1))
    crop_info.append((crop_x1, crop_y1, scale_x, scale_y,1))

    start_model = time.perf_counter()
    outputs = {}
    if num_crops == 1:
        output = self.session.run([], {self.input_name: crops[0]})[0]
        conf, lms = self.landmarks(output[0], crop_info[0])
        print(conf)
        if conf > self.threshold:
            try:
                eye_state = self.get_eye_state(frame, lms)
            except:
                eye_state = [(1.0, 0.0, 0.0, 0.0), (1.0, 0.0, 0.0, 0.0)]
            outputs[crop_info[0]] = (conf, (lms, eye_state), 0)
    else:
        started = 0
        results = queue.Queue()
        for i in range(min(num_crops, self.max_workers)):
            thread = threading.Thread(target=worker_thread, args=(self.sessions[started], frame, crops[started], crop_info[started], results, self.input_name, started, self))
            started += 1
            thread.start()
        returned = 0
        while returned < num_crops:
            result = results.get(True)
            if len(result) != 1:
                session, conf, lms, sample_crop_info, idx = result
                outputs[sample_crop_info] = (conf, lms, idx)
            else:
                session = result[0]
            returned += 1
            if started < num_crops:
                thread = threading.Thread(target=worker_thread, args=(session, frame, crops[started], crop_info[started], results, self.input_name, started, self))
                started += 1
                thread.start()

    actual_faces = []
    good_crops = []
    for crop in crop_info:
        if crop not in outputs:
            continue
        conf, lms, i = outputs[crop]
        x1, y1, _ = lms[0].min(0)
        x2, y2, _ = lms[0].max(0)
        bb = (x1, y1, x2 - x1, y2 - y1)
        outputs[crop] = (conf, lms, i, bb)
        actual_faces.append(bb)
        good_crops.append(crop)
    groups = group_rects(actual_faces)

    best_results = {}
    for crop in good_crops:
        conf, lms, i, bb = outputs[crop]
        if conf < self.threshold:
            continue;
        group_id = groups[str(bb)][0]
        if not group_id in best_results:
            best_results[group_id] = [-1, [], 0]
        if conf > self.threshold and best_results[group_id][0] < conf + crop[4]:
            best_results[group_id][0] = conf + crop[4]
            best_results[group_id][1] = lms
            best_results[group_id][2] = crop[4]

    sorted_results = sorted(best_results.values(), key=lambda x: x[0], reverse=True)[:self.max_faces]
    self.assign_face_info(sorted_results)
    duration_model = 1000 * (time.perf_counter() - start_model)

    results = []
    detected = []
    start_pnp = time.perf_counter()
    for face_info in self.face_info:
        if face_info.alive and face_info.conf > self.threshold:
            face_info.success, face_info.quaternion, face_info.euler, face_info.pnp_error, face_info.pts_3d, face_info.lms = self.estimate_depth(face_info)
            face_info.adjust_3d()
            lms = face_info.lms[:, 0:2]
            x1, y1 = tuple(lms[0:66].min(0))
            x2, y2 = tuple(lms[0:66].max(0))
            bbox = (y1, x1, y2 - y1, x2 - x1)
            face_info.bbox = bbox
            detected.append(bbox)
            results.append(face_info)
    duration_pnp += 1000 * (time.perf_counter() - start_pnp)

    if len(detected) > 0:
        self.detected = len(detected)
        self.faces = detected
        self.discard = 0
    else:
        self.detected = 0
        self.discard += 1
        if self.discard > self.discard_after:
            self.faces = []
        else:
            if self.bbox_growth > 0:
                faces = []
                for (x,y,w,h) in self.faces:
                    x -= w * self.bbox_growth
                    y -= h * self.bbox_growth
                    w += 2 * w * self.bbox_growth
                    h += 2 * h * self.bbox_growth
                    faces.append((x,y,w,h))
                self.faces = faces
    self.faces = [x for x in self.faces if not np.isnan(np.array(x)).any()]
    self.detected = len(self.faces)

    duration = (time.perf_counter() - start) * 1000
    if not self.silent:
        print(f"Took {duration:.2f}ms (detect: {duration_fd:.2f}ms, crop: {duration_pp:.2f}, track: {duration_model:.2f}ms, 3D points: {duration_pnp:.2f}ms)")

    results = sorted(results, key=lambda x: x.id)

    return results`

Thank you :)

emilianavt commented 4 years ago

If you already have a tight face bounding box, you can set it directly on the Tracker object:

tracker.faces = [(x, y, w, h)]
tracker.detected = 1

If max_faces is set to 1, this should skip the detection model. If the bounding boxes are not tight but a bit looser, it might be good to reduce the factors in the four lines here to zero, as they are intended to leave a bit of space around the face.

Looking at your code, you still have those lines, but since you are running the model on just the face, they probably won't do anything, because there is nothing left on the frame to use as margin. Perhaps your face crops are too tight for the model? The model is used to faces with on average 10% margin on all sides around where the landmarks would be. I don't really see anything else that should lower accuracy though. In the line crop_info.append((crop_x1, crop_y1, scale_x, scale_y,1)) the 1 at the end should be 0.1 though.

marzi9696 commented 4 years ago

Thank you so much.yeah I guessed the same thing that the faces are too tight cause I saved both frames the ones your model detects and the ones I've already got and mine were tighter than yours. Thank you so much for answering my questions ❤