csdcorp / speech_to_text

A Flutter plugin that exposes device specific text to speech recognition capability.
BSD 3-Clause "New" or "Revised" License
348 stars 217 forks source link

when i use speech_to_text and record package at the sametime the the speech_to_text get stopped ,this issue is only in android and in ios its working fine #483

Closed kuberanb closed 2 months ago

kuberanb commented 2 months ago

Button to enable/diable record and speech_to_text

` InkWell( onTap: () async { AiRepository aiRepository = AiRepository(); isRecordButtonClicked = true; HapticFeedback.heavyImpact(); setState(() { isStartRecord = !isStartRecord; }); print("isRecordButtonClicked : $isRecordButtonClicked");

                  if (isStartRecord) {
                    await speechService.startListening((speechText) {
                      print("speechText : ${speechText}");
                      messageController.text = speechText;
                      setState(() {});
                    });

                    if (Platform.isIOS) {
                      await audioRecordingService.startRecording();
                    }

                    setState(() {});
                  } else {
                    if (Platform.isIOS) {
                      await audioRecordingService.stopRecording();
                      setState(() {});
                    }

                    await speechService.stopListening();
                    setState(() {});
                  }
                },
                child: Icon(
                  Icons.mic,
                  color: isStartRecord == true
                      ? Colors.red
                      : kMaterialPrimaryColor,
                  size: 30,
                ),
              ),          

// speech to text code

`class SpeechToTextService { late stt.SpeechToText _speech; bool _isListening = false; String _text = '';

SpeechToTextService() { _speech = stt.SpeechToText(); _initSpeechRecognizer(); }

void _initSpeechRecognizer() { _speech.initialize( onError: (error) => print('Speech error: $error'), onStatus: (status) => print('Speech status: $status'), ); }

Locale get systemLocale { return ui.window.locale; }

// Future<List> getAvailableLanguages() async { // Locale locale = systemLocale;

// // Access language code using locale.languageCode // if (locale != null) { // return [locale.languageCode]; // } else { // return []; // } // }

Future<List> getAvailableLanguages() async { // Get the list of locales supported by the device List supportedLocales = await _speech.locales();

// ui.window.locales;

// Extract language codes from the locales
List<String> languageCodes =
    supportedLocales.map((e) => e.localeId).toList();

// supportedLocales.map((locale) => locale.languageCode).toList();

return supportedLocales;

}

Future<LocaleName?> getCurrentLanguage() async { LocaleName? currentLocale = await _speech.systemLocale();

print(
    "system locale/ localeId : ${currentLocale?.name ?? ""} localeName : ${currentLocale?.localeId} ");

return currentLocale;

}

Future startListening(Function(String) onResult) async { var logger = Logger();

if (!_isListening) {
  bool available = await _speech.initialize(
    onError: (error) => print('Speech error: ${error.errorMsg}'),
    onStatus: (status) => print('Speech status: ${status.toString()}'),
    debugLogging: true,
  );

  if (available) {
    _speech.listen(
      onResult: (result) {
        // if (result.finalResult) {
        _text = result.recognizedWords;
        print("recogonized text in speechToText : $_text");
        logger.w("recogonized text in speechToText : $_text");
        onResult(_text);
        //}
      },
    );
    _isListening = true;
  }
}

}

Future stopListening() async { if (_isListening) { await _speech.stop(); _isListening = false; } }

String get recognizedText record code

`class AudioRecordingService { String audioFilePath = "";

AudioRecordingService() { // Initialization logic can be added here if needed }

Future isWavAudio(String filePath) async { if (filePath.toLowerCase().endsWith('.wav')) { print("wav audio file"); return true; } else { print("not a wav audio file"); return false; } }

Future startRecording() async { try { bool hasPermission = await Record().hasPermission();

  // final directory = await getApplicationDocumentsDirectory();
  // final uniqueFileName =
  //     'recording_${DateTime.now().millisecondsSinceEpoch}.m4a';
  // final path = 'file://${directory.path}/$uniqueFileName';
  // final tempPath = path;
  //
  // print("tempPath tempPath: ${tempPath}");

  if (hasPermission) {
    await Record().start(
      //  path: tempPath,
      encoder: AudioEncoder.wav,
      // bitRate: 16000,
    );
    print(
        "issupported wav(format) : ${await Record().isEncoderSupported(AudioEncoder.wav)}");
  } else {
    print("Permission not granted for audio recording.");
  }
} catch (e) {
  print("Error starting recording: $e");
}

}

Future stopRecording() async { final path = await Record().stop(); print("path when we get when we stop recording audio : $path"); if (path != null) { if (await isWavAudio(path)) { print("Audio already in wav format"); audioFilePath = path; } else { print("Audio is not in wav format"); print("conversion of audio file to wav format started"); String convertedAudioFilePath = await convertToWav(path); print("convertedAudioFilePath : $convertedAudioFilePath"); audioFilePath = convertedAudioFilePath; } } else { showSnackbar(snackbarText: "Error Recording Audio,Try Again"); return; }

// if (Platform.isIOS) {
//   audioFilePath = audioFilePath.replaceAll(".m4a", ".wav");
// }

print("final audioPath before getting getFileString : $audioFilePath");
// int index = 1;
// while (!await File(audioFilePath).exists()) {
//   await Future.delayed(const Duration(milliseconds: 100));
//   print("waiting $index");
//   index++;
// }

// Handle the audioFilePath as needed (e.g., send it to the server, save to storage, etc.).
print("Audio file path: $audioFilePath");
String fileString = await getFileString();
print("Base64 audio string: $fileString");

}

String getAudioFilePath() { return audioFilePath; }

// Future getAudioFilePath() async { // try { // // Get the directory for storing audio files // final directory = await getApplicationDocumentsDirectory(); // // Create a path combining the directory path and the file name // var path = DateTime.now().millisecondsSinceEpoch.toString(); // final filePath = '${directory.path}/$path/myFile.wav';

// // Create the file explicitly // File file = File(filePath); // await file.create(recursive: true);

// return filePath; // } catch (e) { // print("Error creating audio file: $e"); // return ''; // Return an empty string to indicate an error // }99 // }

// void writeToFile(String content) async { // Directory appDocumentsDirectory = await getApplicationDocumentsDirectory(); // String filePath = '${appDocumentsDirectory.path}/output.txt'; // print("filePath for .txt : $filePath"); // File(filePath).writeAsStringSync(content); // }

Future getFileString() async { print("audioPath : $audioFilePath in getFileString function");

File file = File.fromUri(Uri.parse(audioFilePath));

print("file exists : ${await file.exists()}");

Uint8List bytes = await file.readAsBytes();
String base64String = base64Encode(bytes);
// writeToFile(base64String);

log("base64String : $base64String");
return 'data:audio/wav;base64,$base64String';

} } `

kuberanb commented 2 months ago

@kaladron @thandal @deJong-IT @atrope @markszente @sowens-csd

agladyshev commented 2 months ago

@kuberanb this is a limitation on Android, see this issue

kuberanb commented 2 months ago

@agladyshev due to this limitation i tried to use vosk_flutter package by downloading the model of 40mb but its giving me a empty string whenever i transcribe the audio

`import 'dart:developer'; import 'dart:io'; import 'dart:typed_data'; import 'package:flutter/services.dart'; import 'package:vosk_flutter/vosk_flutter.dart';

class VoskTranscriber { late VoskFlutterPlugin _vosk; Recognizer? _recognizer;

VoskTranscriber() { initializeVosk(); }

Future initializeVosk() async { Model? _model; const _sampleRate = 16000; String? _error; SpeechService? _speechService; print("_initializeVosk called"); _vosk = VoskFlutterPlugin.instance(); final modelPath = await ModelLoader() .loadFromAssets('assets/models/vosk-model-small-en-us-0.15.zip') .then( (modelPath) => _vosk.createModel(modelPath)) // create model object .then((model) => model = model) .then(() => _vosk.createRecognizer( model: _model!, sampleRate: _sampleRate)) // create recognizer .then((value) => _recognizer = value) .then((recognizer) { if (Platform.isAndroid) { _vosk .initSpeechService(_recognizer!) // init speech service .then((speechService) => _speechService = speechService) .catchError((e) => _error = e.toString()); } }).catchError((e) { _error = e.toString(); return null; });

// print("modelPath :$modelPath");
// _recognizer = await _vosk.createRecognizer(
//     model: Model(modelPath, const MethodChannel("")), sampleRate: 16000);

}

Future transcribeAudio(Uint8List audioBytes) async { print("transcribeAudio function called"); List results = []; int chunkSize = 8192; int pos = 0;

while (pos + chunkSize < audioBytes.length) {
  final resultReady = await _recognizer!.acceptWaveformBytes(
    Uint8List.fromList(audioBytes.sublist(pos, pos + chunkSize)),
  );
  pos += chunkSize;

  if (resultReady) {
    String result = await _recognizer!.getResult();
    log("complete result in vosk : ${result}");
    results.add(result);
  } else {
    String result = await _recognizer!.getPartialResult();

    log("partial result in vosk : ${result}");
    results.add(result);
  }
}

await _recognizer!
    .acceptWaveformBytes(Uint8List.fromList(audioBytes.sublist(pos)));
results.add(await _recognizer!.getFinalResult());

return results.join(' ');

} } `

agladyshev commented 2 months ago

I see that you are feeding vosk audio data directly. I haven't tried that. I just vosk-flutter built-in streams. From their docs:

final speechService = await vosk.initSpeechService(recognizer);
speechService.onPartial().forEach((partial) => print(partial));
speechService.onResult().forEach((result) => print(result));
await speechService.start();

Try to see if using vosk with mic access directly works for you. If it does, there is some issue with processing Uint8List audioBytes you send it.

kuberanb commented 2 months ago

@agladyshev Audio recording and vosk_flutter mic is not working simultaneosly thats why i directly used the audio file from record package and whats the issue with processing Uint8List audioBytes and how to fix it

agladyshev commented 2 months ago

I can tell you with confidence that you can record audio with record: ^4.4.4 and run STT with vosk_flutter: ^0.3.48 at the same time. I just tested it on a physical device and it works in production for my app.

Are you testing on a physical device? Does your app have mic permission before you initialize vosk? Did you wait until model is fully loaded? It takes a while to load on first launch.

I made a demo for you based on Record example with Vosk. You can just clone and run this project to test. There is a screen recording in the readme you can check out.

kuberanb commented 2 months ago

@agladyshev Thank you so much for your valuable help , I tested it in emulator thats why it was not working , Now i tried in physical device and now its working fine but why in the case of speech_to_text package with record is not working but here in the case of vosk with record package is working , Also is there any way to add multiple model to same one such as indian english and us english so as to improve the voice to text as i was getting many wrong texts

agladyshev commented 2 months ago

@kuberanb this package doesn't work in this case because it uses native Android voice recognition interface and can't change how it works. The fact that that you can't get a recording or record at the same time is an issue in Android OS.

As for using multiple models, you should ask in vosk_package, I unfortunately, don't know much about it.

kuberanb commented 2 months ago

@agladyshev ok thank you, I will ask in vosk_flutter