I have written a code that tries to detect a speaker with a .wav audio file in Python SDK. But when I run my code, Eagle returns only a list contains 0.0. However, the voice train is really different (a woman and a man). And the recording is made at 16 kHz with a professional microphone. For example : I have this trace back :
Checking for: Woman.eagle
User score for Woman.eagle: 0.0
Checking for: Man.eagle
User score for Man.eagle: 0.0
Recognized as: Woman.eagle
This is my code, can you help me ?
import pveagle
from pvrecorder import PvRecorder
import os
import wave
import numpy as np
access_key = "NA0NuP+5Orn3NUuj8UHB6Sj1VaolSuM2qvYlQeeWbYs6epGzsACtYA==";
def analyseVoiceRepertories():
# List all files in /src/caches/users
listUsers = []
for file in os.listdir("src/caches/users"):
if file.endswith(".eagle"):
listUsers.append(file)
return listUsers
def trainVoiceRecognition():
name = input("Entrez votre prénom : ").strip().replace(" ", "")
try:
eagle_profiler = pveagle.create_profiler(access_key=access_key)
except pveagle.EagleError as e:
print(f"Erreur lors de la création du profiler: {e}")
return
recorder = PvRecorder(device_index=0, frame_length=eagle_profiler.min_enroll_samples)
recorder.start()
print("Recording started... Speak please")
enroll_percentage = 0.0
while enroll_percentage < 100.0:
audio_frame = recorder.read()
enroll_percentage, feedback = eagle_profiler.enroll(audio_frame)
print(f"Progression de l'enregistrement : {enroll_percentage:.2f}% - {feedback.name}")
if feedback == pveagle.EagleProfilerEnrollFeedback.QUALITY_ISSUE :
print("La qualité de l'enregistrement est insuffisante.")
recorder.stop()
speaker_profile = eagle_profiler.export()
with open(f"src/caches/users/{name}.eagle", 'wb+') as f:
f.write(pveagle.EagleProfile.to_bytes(speaker_profile))
eagle_profiler.delete()
print("Training finished and profile saved")
def wavFileToPCM(file_path):
if not os.path.exists(file_path):
raise FileNotFoundError(f"Le fichier {file_path} est introuvable.")
with wave.open(file_path, 'rb') as wav_file:
# Récupérer les paramètres
n_channels = wav_file.getnchannels()
sample_width = wav_file.getsampwidth()
frame_rate = wav_file.getframerate()
if frame_rate != 16000:
raise ValueError(f"Taux d'échantillonnage non supporté: {frame_rate}, 16kHz attendu.")
n_frames = wav_file.getnframes()
frames = wav_file.readframes(n_frames)
if sample_width == 2:
pcm_data = np.frombuffer(frames, dtype=np.int16)
elif sample_width == 1:
pcm_data = np.frombuffer(frames, dtype=np.uint8) - 128
else:
raise ValueError(f"Largeur d'échantillon non supportée: {sample_width}")
return pcm_data
def VoiceRecognition():
Users = analyseVoiceRepertories()
if not os.path.exists("src/caches/request.wav"):
print("Le fichier request.wav est introuvable.")
return
pcm_data = wavFileToPCM("src/caches/request.wav")
print(pcm_data)
userScore = []
for user in Users:
print(f"Checking for: {user}")
with open(f"src/caches/users/{user}", 'rb') as f:
speakerProfile = pveagle.EagleProfile.from_bytes(f.read())
try:
eagle = pveagle.create_recognizer(access_key, speakerProfile)
except pveagle.EagleError as e:
print(f"Erreur lors de la reconnaissance de la voix : {e}")
continue
score = []
frame_length = eagle.frame_length
for i in range(0, len(pcm_data), frame_length):
frame = pcm_data[i:i + frame_length]
if len(frame) != frame_length:
print(f"Frame incomplete at index {i}, expected {frame_length}, got {len(frame)}")
continue
scores = eagle.process(frame)
score.append(scores[0])
eagle.delete()
average_score = sum(score) / len(score) if score else 0
print(f"User score for {user}: {average_score}")
userScore.append(average_score)
if userScore:
recognized_user = Users[userScore.index(max(userScore))]
print(f"Recognized as: {recognized_user}")
return recognized_user
else:
print("Aucun utilisateur reconnu.")
return None
#trainVoiceRecognition()
VoiceRecognition()
I have written a code that tries to detect a speaker with a .wav audio file in Python SDK. But when I run my code, Eagle returns only a list contains 0.0. However, the voice train is really different (a woman and a man). And the recording is made at 16 kHz with a professional microphone. For example : I have this trace back :
This is my code, can you help me ?