yasirkula / UnitySpeechToText

A native Unity plugin to convert speech to text on Android & iOS
MIT License
78 stars 12 forks source link

unity ios #10

Open zhuyuqi1997 opened 2 weeks ago

zhuyuqi1997 commented 2 weeks ago

The first time the result is successfully identified and returned, and the second time the result is returned, the result is failed

yasirkula commented 2 weeks ago

May I see your relevant code and know the device specifications?

zhuyuqi1997 commented 2 weeks ago

using Assets.ScriptsHot.Framework.Common; using CommonUI; using FairyGUI; using Spine; using UnityEngine;

public class ExpectUI : BaseUI, IBaseUIUpdate {

region 系统 参数

public bool PreferOfflineRecognition = false;

public ExpectUI(string name) : base(name) { }

protected override string uiLayer => UILayerConsts.Home; //主UI层
protected override bool isFullScreen => true;

private LearnPathController _learnPathController => GetController<LearnPathController>(ModelConsts.LearnPath);
#endregion

#region 系统函数

protected override void OnInit(GComponent uiCom)
{
    AddUIEvent(ui.btnstart.onClick, btnstartClick);
    AddUIEvent(ui.btnstop.onClick, btnstopClick);

}
public void Update(int interval)
{

    ui.btnstart.touchable = SpeechToText.IsServiceAvailable(PreferOfflineRecognition) && !SpeechToText.IsBusy();
    ui.btnstop.touchable = SpeechToText.IsBusy();

    // You may also apply some noise to the voice level for a more fluid animation (e.g. via Mathf.PerlinNoise)
    //VoiceLevelSlider.value = Mathf.Lerp(VoiceLevelSlider.value, normalizedVoiceLevel, 15f * Time.unscaledDeltaTime);
}

#endregion

#region Button Event

private void btnstartClick()
{
    SpeechToText.RequestPermissionAsync((permission) =>
    {
        if (permission == SpeechToText.Permission.Granted)
        {

            VFDebug.Log("PreferOfflineRecognition" + PreferOfflineRecognition);
            if (SpeechToText.Start(SpeechToTextManger.instance, preferOfflineRecognition: PreferOfflineRecognition))
            {
                VFDebug.Log("1111111");
                ui.tfContent.text = "";
            }

            else
            {
                VFDebug.Log("222222");
                ui.tfContent.text = "Couldn't start speech recognition session!";
            }

        }
        else
            ui.tfContent.text = "Permission is denied!";
    });
}

private void btnstopClick()
{
    SpeechToText.ForceStop();
    Hide();
}

public void SetText(string text)
{
    ui.tfContent.text = text;
}

#endregion

#region 其他

#endregion

}

zhuyuqi1997 commented 2 weeks ago

namespace Assets.ScriptsHot.Framework.Common { internal class SpeechToTextManger : MonoSingleton, ISpeechToTextListener { public bool PreferOfflineRecognition;

    private float normalizedVoiceLevel;
    public void Initialize()
    {
        SpeechToText.Initialize("en-US");

    }

    public void ChangeLanguage(string preferredLanguage)
    {
        //if (!SpeechToText.Initialize(preferredLanguage))
        //    SpeechText.text = "Couldn't initialize with language: " + preferredLanguage;
    }

    public void StartSpeechToText()
    {

    }

    public void StopSpeechToText()
    {
        SpeechToText.ForceStop();
    }

    void ISpeechToTextListener.OnReadyForSpeech()
    {
        VFDebug.Log("OnReadyForSpeech");
    }

    void ISpeechToTextListener.OnBeginningOfSpeech()
    {
        VFDebug.Log("OnBeginningOfSpeech");
    }

    void ISpeechToTextListener.OnVoiceLevelChanged(float normalizedVoiceLevel)
    {
        // Note that On Android, voice detection starts with a beep sound and it can trigger this callback. You may want to ignore this callback for ~0.5s on Android.
        this.normalizedVoiceLevel = normalizedVoiceLevel;
    }

    void ISpeechToTextListener.OnPartialResultReceived(string spokenText)
    {
        VFDebug.Log("OnPartialResultReceived: " + spokenText);
        GameEntry.LoginC.GetController<LearnPathController>(ModelConsts.LearnPath).SetText(spokenText);
        //SpeechText.text = spokenText;
    }

    void ISpeechToTextListener.OnResultReceived(string spokenText, int? errorCode)
    {
        VFDebug.Log("OnResultReceived: " + spokenText + (errorCode.HasValue ? (" --- Error: " + errorCode) : ""));
        GameEntry.LoginC.GetController<LearnPathController>(ModelConsts.LearnPath).SetText(spokenText);
        normalizedVoiceLevel = 0f;

        // Recommended approach:
        // - If errorCode is 0, session was aborted via SpeechToText.Cancel. Handle the case appropriately.
        // - If errorCode is 9, notify the user that they must grant Microphone permission to the Google app and call SpeechToText.OpenGoogleAppSettings.
        // - If the speech session took shorter than 1 seconds (should be an error) or a null/empty spokenText is returned, prompt the user to try again (note that if
        //   errorCode is 6, then the user hasn't spoken and the session has timed out as expected).
    }
}

}

zhuyuqi1997 commented 2 weeks ago

iPhone 13

yasirkula commented 2 weeks ago

Could you share the VFDebug logs for the first session and the second session?

zhuyuqi1997 commented 2 weeks ago

20241106-194935

zhuyuqi1997 commented 2 weeks ago

img_v3_02gc_660a60dc-34ce-40b9-bd93-e5aa8efe214g

yasirkula commented 2 weeks ago

Does nothing really show up after the 1111111 log? I was expecting a log with an errorCode. In addition, can you also check if OnVoiceLevelChanged is invoked continuously during the buggy session?

zhuyuqi1997 commented 1 week ago

Yes there are no more logs

zhuyuqi1997 commented 1 week ago

How to check whether OnVoiceLevelChanged is called continuously

yasirkula commented 1 week ago

You can either put a Debug.Log or if you don't want to see many logs, create a Cube in front of the camera and change its Scale in OnVoiceLevelChanged.

zhuyuqi1997 commented 1 week ago

OnVoiceLevelChanged is not invoked continuously during the buggy session

yasirkula commented 1 week ago

I'd like to debug the native code. Since I haven't encountered this issue, I'll have rely on your help. First, I'd recommend checking the logs in Xcode Console because if there's a native error that I've overlooked, it may show up there. If there are no such errors, then open SpeechToText.mm inside built Xcode project and put NSLog lines here and there to try pinpointing where the logs stop appearing (example NSLog usage). You should put logs specifically inside this function and this function.