Azure-Samples / cognitive-services-speech-sdk

Sample code for the Microsoft Cognitive Services Speech SDK
MIT License
2.85k stars 1.84k forks source link

Why I can not stop the active keyword recognition by KeywordRecognizer::StopRecognitionAsync? #2599

Open nemo-by-replace opened 3 weeks ago

nemo-by-replace commented 3 weeks ago

Describe the bug

I am using Azure Keyword Recognition in my project. I found that sometimes I can not stop the active recognation by KeywordRecognizer::StopRecognitionAsync, KeywordRecognizer::StopRecognitionAsync().get() will wait forever! I found it happend by this step: 1:keywordRecognizer.recognizeOnceAsync(my-followme-model).get() 2:speak out "follow me" make step 1 returns 3:repeat step 1 4: call KeywordRecognizer::StopRecognitionAsync().get() in 500ms after setp 3

Azure log file: https://f4x6dn8llc.feishu.cn/file/IS1cbULZro0zZjxtlLyc6RAnnFb?from=from_copylink

My code:

void KeywordRecog::keyword_recog_task(void) { spdlog::info("\033[32m{} keyword_recog_task in!\033[0m", m_debug_prefix); while (m_keyword_recog_task_is_running) { spdlog::info("\033[32m{} keyword_recog_task loop!\033[0m", m_debug_prefix); std::future<std::shared_ptr> t_fut = m_keyword_recognizer->RecognizeOnceAsync(m_keyword_recognition_model); std::shared_ptr t_result = t_fut.get(); spdlog::info("\033[32m{} get keyword \"{}\"\033[0m", m_debug_prefix, t_result->Text.c_str()); spdlog::info("Reason {}", (int)t_result->Reason);

    if (t_result->Reason == Speech::ResultReason::Canceled)
    {
        // break;
    }
    else if (t_result->Reason == Speech::ResultReason::RecognizedKeyword)
    {
        // do something
    }
}
spdlog::info("\033[32m{} keyword_recog_task out!\033[0m", m_debug_prefix);

}

bool KeywordRecog::init(bool _push_or_pull) { try { m_keyword_recognition_model = Speech::KeywordRecognitionModel::FromFile(m_modelname); m_audio_stream_format = Speech::Audio::AudioStreamFormat::GetWaveFormatPCM(16000, 16, 1); m_push_stream = Speech::Audio::PushAudioInputStream::Create(m_audio_stream_format); m_audio_config = Speech::Audio::AudioConfig::FromStreamInput(m_push_stream); m_audio_config->SetProperty(Speech::PropertyId::Speech_LogFilename, "/tmp/keyword_recog.log"); m_keyword_recognizer = Speech::KeywordRecognizer::FromConfig(m_audio_config); m_keyword_recognizer->Recognized += [prefix = this->m_debug_prefix](const Speech::KeywordRecognitionEventArgs& e) { spdlog::info("\033[32m {} evt {} text {}\033[0m", (int)e.Result->Reason, e.Result->Text.c_str(), prefix); };

    m_keyword_recognizer->Canceled += [](const Speech::SpeechRecognitionCanceledEventArgs& e)
    {
        switch (e.Reason)
        {
        case Speech::CancellationReason::EndOfStream:
            // Input stream was closed or the end of an input file was reached.
            spdlog::info("\033[31mCANCELED: EndOfStream\033[0m");
            break;

        case Speech::CancellationReason::Error:
            // NOTE: In case of an error, do not use the same recognizer for recognition anymore.
            spdlog::error("CANCELED: ErrorCode= {}", int(e.ErrorCode));
            spdlog::error("CANCELED: ErrorDetails= {}", e.ErrorDetails);
            break;
        case Speech::CancellationReason::CancelledByUser:
            spdlog::info("CANCELED: CanceledByUser");
            break;
        default:
            spdlog::info("CANCELED: Reason={}", int(e.Reason));
            break;
        }
    };        
    return true;
}
catch (const std::exception& e)
{
    spdlog::error("KeywordRecog init fail. Error: {}", e.what());
    return false;
}

}

void KeywordRecog::start(void) { spdlog::info("\033[31m{} KeywordRecog start in!\033[0m", m_debug_prefix); stop(); m_audio_push_thd = std::thread(&KeywordRecog::audio_push_task, this); jackAudioCapture::get_instance().regist_jack_capture_callback(std::bind(&KeywordRecog::audio_push_enqueue, this, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3), reinterpret_cast<void *>(this)); m_keyword_recog_task_is_running = true; m_keyword_recog_thd = std::thread(&KeywordRecog::keyword_recog_task, this); spdlog::info("\033[31m{} KeywordRecog start out!\033[0m", m_debug_prefix); }

void KeywordRecog::stop(void) { spdlog::info("\033[31m{} KeywordRecog stop in!\033[0m", m_debug_prefix); if (m_keyword_recog_thd.joinable()) { m_keyword_recog_task_is_running = false; m_keyword_recognizer->StopRecognitionAsync().get(); // 50ms consume sometimes it will blocked here forever

    m_keyword_recog_thd.join();
    m_audio_push_queue.enqueue(std::make_unique<std::vector<jack_default_audio_sample_t>>());
    m_audio_push_thd.join();
    jackAudioCapture::get_instance().remove_jack_capture_callback(reinterpret_cast<void *>(this));        
}
spdlog::info("\033[31m{} KeywordRecog stop out!\033[0m", m_debug_prefix);

} Version of the Cognitive Services Speech SDK SpeechSDK-1.33.0

Platform, Operating System, and Programming Language

github-actions[bot] commented 5 days ago

This item has been open without activity for 19 days. Provide a comment on status and remove "update needed" label.