Closed aadha3 closed 5 years ago
You will need to set a threshold for the feature distance, or use some other measure of confidence.
Hello,
I declared a tracks_scores list and I am appending fdist_mf to corresponding track.
Here what is fdist_mf
is it confidence for each frame ?
Please see the below snippets
tracks_scores = [ [] for i in range(1000000) ]
for ij, frame in enumerate(track[0][0].tolist()) :
faces[frame].append([ii, fdist_mf[ij], track[1][0][ij], track[1][1][ij], track[1][2][ij]])
tracks_scores[ii].append(fdist_mf[ij])
#Afterwards when I am drawing rectangle for the average values between 6 and 8 as bellow
tracks_average_scores = {}
while True:
ret, image = cap.read()
if ret == 0:
break
first_face = True
for face in sorted(faces[frame_num], key=itemgetter(1)):
if face[0] not in tracks_average_scores:
tracks_average_scores[face[0]] = numpy.average(tracks_scores[face[0]])
if first_face and tracks_average_scores[face[0]] < 8 and tracks_average_scores[face[0]] > 6:
first_face = False
cv2.rectangle(image,(int(face[3]-face[2]),int(face[4]-face[2])),(int(face[3]+face[2]),int(face[4]+face[2])),(255,0,0),3)
cv2.putText(image,'Track %d, L2 Dist %.3f %d'%(face[0],tracks_average_scores[face[0]], len(tracks_scores[face[0]])), (int(face[3]-face[2]),int(face[4]-face[2])),cv2.FONT_HERSHEY_SIMPLEX,0.5,(255,255,255),2)
continue
cv2.rectangle(image,(int(face[3]-face[2]),int(face[4]-face[2])),(int(face[3]+face[2]),int(face[4]+face[2])),(0,0,255),3)
cv2.putText(image,'Track %d, L2 Dist %.3f %d'%(face[0], tracks_average_scores[face[0]], len(tracks_scores[face[0]])), (int(face[3]-face[2]),int(face[4]-face[2])),cv2.FONT_HERSHEY_SIMPLEX,0.5,(255,255,255),2)
For our applications, we used the fconfm variable given in SyncNetInstance.py
In
run_visualise.py
I don't see that you detect active speaker. How do you detect active speaker when you have multiple faces?