csdcorp / speech_to_text

A Flutter plugin that exposes device specific text to speech recognition capability.
BSD 3-Clause "New" or "Revised" License
351 stars 218 forks source link

pauseFor causes onResult callback to fire multiple times #158

Closed rivetingpeppermint closed 3 years ago

rivetingpeppermint commented 3 years ago

I modified the example code to take commands and reply to them. I used the pauseFor parameter since on iOS, the listening activity won't stop until the full listenFor duration is completed. This works perfectly fine on my Android device (Galaxy Tab A, Android 9). When I tried it on iOS (iPhone 11 iOS 13 simulator and an iPad Pro running iOS 14.2), the onResult callback is fired multiple times. See screenshot. Screen Shot 2020-12-01 at 09 27 24

If I remove the pauseFor parameter, it works fine on iOS, the multiple callbacks doesn't happen. I can't figure out what's wrong.

Here's my full code:

import 'dart:math';

import 'package:flutter/material.dart';
import 'package:shared_preferences/shared_preferences.dart';
import 'package:speech_to_text/speech_recognition_error.dart';
import 'package:speech_to_text/speech_recognition_result.dart';
import 'package:speech_to_text/speech_to_text.dart';

void main() {
  runApp(MyApp());
}

class MyApp extends StatelessWidget {
  @override
  Widget build(BuildContext context) {
    return MaterialApp(
      title: 'Flutter Demo',
      theme: ThemeData(
        primarySwatch: Colors.blue,
        visualDensity: VisualDensity.adaptivePlatformDensity,
      ),
      home: MyHomePage(title: 'speech to text example'),
    );
  }
}

class MyHomePage extends StatefulWidget {
  MyHomePage({Key key, this.title}) : super(key: key);

  final String title;

  @override
  _MyHomePageState createState() => _MyHomePageState();
}

class _MyHomePageState extends State<MyHomePage> {
  bool _hasSpeech = false;
  double level = 0.0;
  double minSoundLevel = 50000;
  double maxSoundLevel = -50000;
  String lastWords = "";
  String lastError = "";
  String lastStatus = "";
  String _currentLocaleId = "";
  final SpeechToText stt = SpeechToText();
  int listenDuration = 5;
  int pauseDuration = 5;
  int sampleRate = 44100;
  bool isCancelOnError = true;
  bool isPartialResults = false;

  SharedPreferences sp;
  var chatList = new List<Chat>();

  Future<void> initSpeechState() async {
    sp = await SharedPreferences.getInstance();
    if (!mounted) {
      print('not mounted');
      return;
    }
    setState(() {
      listenDuration = 5;
      isCancelOnError = true;
      isPartialResults = false;
      _currentLocaleId = 'en_US';
      pauseDuration = 5;
    });

    bool hasSpeech = await stt.initialize(
        onError: errorListener, onStatus: statusListener, debugLogging: true);

    if (!mounted) return;

    setState(() {
      _hasSpeech = hasSpeech;
    });
  }

  Future<void> startListening() async {
    if (mounted) {
      setState(() {
        lastWords = "";
        lastError = "";
      });
    }
    stt.listen(
        onResult: resultListener,
        listenFor: Duration(seconds: listenDuration),
        pauseFor: Duration(seconds: pauseDuration),
        localeId: _currentLocaleId,
        onSoundLevelChange: soundLevelListener,
        cancelOnError: isCancelOnError,
        partialResults: isPartialResults,
        listenMode: ListenMode.confirmation,
        sampleRate: sampleRate);
    print('we\'re listening');
  }

  void stopListening() {
    print("STOPPED LISTENING");
    stt.stop();
  }

  void cancelListening() {
    stt.cancel();
    setState(() {
      level = 0.0;
    });
  }

  void resultListener(SpeechRecognitionResult result) {
    if (mounted) {
      setState(() {
        if (result.finalResult) {
          lastWords = result.recognizedWords;
          respondToCommand(lastWords);
          lastWords = "";
        }
      });
    }
  }

  void soundLevelListener(double level) {
    minSoundLevel = min(minSoundLevel, level);
    maxSoundLevel = max(maxSoundLevel, level);
    setState(() {
      this.level = level;
    });
  }

  void errorListener(SpeechRecognitionError error) {
    stopListening();
    try {
      setState(() {
        lastError = "${error.errorMsg} - ${error.permanent}";
      });
      debugPrint('ERROR: ' + error.toString());
    } catch (e) {
      print(e.toString());
    }
  }

  void statusListener(String status) {
    setState(() {
      lastStatus = "$status";
    });
  }

  Future<void> _loadInitial() async {
    await initSpeechState();
    if (_hasSpeech && stt.isAvailable) {
      await startListening();
    }
  }

  void respondToCommand(String message) {
    if (message.trim().length > 0) {
      if (mounted) {
        setState(() {
          stopListening();
          Map<String, dynamic> resp = _getResponse(lastWords);
          if (!resp['navigate']) resp = _getOtherResponse(lastWords);
          lastWords = "";
          chatList.insert(0, new Chat(message));
          if (resp.isNotEmpty) {
            chatList.insert(0, new Chat(resp['response'], bot: true));
          }
          if (resp['navigate']) {
            if (resp['main_menu']) {
              // Navigator.pushReplacementNamed(context, resp['route']);
              showDialog(
                context: context,
                builder: (BuildContext context) {
                  return AlertDialog(
                    title: new Text("Alert Dialog title"),
                    content: new Text("going to main menu"),
                    actions: <Widget>[
                      new FlatButton(
                        child: new Text("Close"),
                        onPressed: () {
                          Navigator.of(context).pop();
                        },
                      ),
                    ],
                  );
                },
              );
            } else {
              showDialog(
                context: context,
                builder: (BuildContext context) {
                  return AlertDialog(
                    title: new Text("Alert Dialog title"),
                    content: new Text("going to other pages"),
                    actions: <Widget>[
                      new FlatButton(
                        child: new Text("Close"),
                        onPressed: () {
                          Navigator.of(context).pop();
                        },
                      ),
                    ],
                  );
                },
              );
            }
          }
        });
      }
    }
  }

  String _getCantDoResponse() {
    List<String> cantDoResponses = [
      "I'm sorry, I don't think I can do that yet.",
      "I'm sorry, I don't think I understand.",
    ];
    return (cantDoResponses.toList()..shuffle()).first;
  }

  Map<String, dynamic> _getOtherResponse(String lastWords) {
    lastWords = lastWords.toLowerCase();
    String response = _getCantDoResponse();
    if (lastWords.contains("listen") ||
        lastWords.contains("hear") ||
        lastWords.contains("understand") ||
        lastWords.contains("you here") ||
        lastWords.contains("you there") ||
        lastWords.contains("check") ||
        lastWords.contains("test")) {
      response = "I'm here.";
    }

    return {
      "navigate": false,
      "main_menu": false,
      "route": '',
      "response": response
    };
  }

  Map<String, dynamic> _getResponse(String lastWords) {
    lastWords = lastWords.toLowerCase();
    String route = '';
    String pageName = '';
    bool mainMenu = false;
    if (lastWords.contains("home")) {
      route = '/home';
      mainMenu = true;
      pageName = "home";
    }

    bool navigate = false;
    String response = '';
    if (pageName != '') {
      response = 'Going to ' + pageName + "...";
      navigate = true;
    } else {
      response = _getCantDoResponse();
    }
    return {
      "navigate": navigate,
      "main_menu": mainMenu,
      "route": route,
      "response": response
    };
  }

  @override
  Widget build(BuildContext context) {
    return Scaffold(
      appBar: AppBar(
        title: Text(widget.title),
      ),
      body: ListView.builder(
        padding: EdgeInsets.only(bottom: 10),
        itemCount: chatList.length,
        reverse: true,
        itemBuilder: (context, index) {
          Chat chat = chatList[index];
          bool isBot = chatList[index].bot;
          return Container(
              margin:
                  EdgeInsets.fromLTRB(isBot ? 10 : 60, 10, isBot ? 60 : 10, 0),
              decoration: BoxDecoration(
                borderRadius: BorderRadius.circular(7),
                color: isBot ? Colors.blue : Colors.grey,
              ),
              child: Align(
                  alignment:
                      isBot ? Alignment.centerLeft : Alignment.centerRight,
                  child: Column(
                      crossAxisAlignment: isBot
                          ? CrossAxisAlignment.start
                          : CrossAxisAlignment.end,
                      children: [
                        Text(chat.message,
                            style: TextStyle(fontWeight: FontWeight.normal)),
                      ])));
        },
      ),
      floatingActionButton: FloatingActionButton(
          child: Icon(((stt.isListening)) ? Icons.stop : Icons.mic,
              color: Colors.white),
          onPressed: () async {
            if (stt.isListening) {
              stopListening();
            } else {
              if (!_hasSpeech) {
                _loadInitial();
              } else {
                await startListening();
              }
            }
          }), // This trailing comma makes auto-formatting nicer for build methods.
    );
  }
}

class Chat {
  String message;
  final bool bot;
  Chat(this.message, {this.bot = false});
}

I used flutter 1.17.5 and Dart 2.8.4. Here's my pubspec.yaml file:

name: stt_pausefor_bug
description: A new Flutter project.

publish_to: 'none' 
version: 1.0.0+1

environment:
  sdk: ">=2.7.0 <3.0.0"

dependencies:
  flutter:
    sdk: flutter

  cupertino_icons: ^0.1.3

  speech_to_text: ^3.0.1
  shared_preferences: ^0.5.6

dev_dependencies:
  flutter_test:
    sdk: flutter

flutter:

  uses-material-design: true

There aren't any errors in the logs, just the onResult callback being fired multiple times in a row.

rivetingpeppermint commented 3 years ago

I've found that if I changed partialResults: partialResults || null != pauseFor, (speech_to_text.dart line 330, inside the bool started = await SpeechToTextPlatform.instance.listen() call) to just partialResults: partialResults,, the duplicated calls doesn't occur.

sowens-csd commented 3 years ago

Certainly sounds like an error in the plugin, thanks for reporting. I'll have a look at the behaviour.

sowens-csd commented 3 years ago

I think I found the issue. I'll have to think about how to resolve it but in the meantime I think you can avoid it pretty easily. Currently you have pauseFor and listenFor set to the same value. That's making the two timers expire at the same instant and causing your duplicate callbacks. Can you try the code again with pauseFor set to 2 seconds with listenFor still at 5 and see if you're still getting duplicates?

cswkim commented 3 years ago

I believe I'm experiencing the same issue in my iOS simulator (14.4) using the latest version of this library 3.1.0. I did not set listenFor and my listen call looks like: .listen(pauseFor: Duration(seconds: 2)); I'm using SpeechToTextProvider and watching .lastResult.finalResult. Most of the time I get 2 final results, sometimes 3. I can't quite detect the pattern but after I start to listen sometimes it cuts off after one word, not waiting the full 2 seconds. In those cases I get a single finalResult. Setting listenFor to a value greater than pauseFor did not address the issue for me.

UPDATE: I have two separate widget files (using riverpod to consume the SpeechToTextProvider):

a) button with an onPressed

speech.listen(pauseFor: Duration(seconds: 2));
speech.stream.listen((event) {
  if (event.eventType == SpeechRecognitionEventType.finalRecognitionEvent) {
    print('stream: ${event.recognitionResult.recognizedWords}');
  }
});

b) a plain widget watching .lastResult for debugging

String output = result?.recognizedWords ?? '';
if (result?.finalResult ?? false) {
  print('words: $output, confidence: ${result.confidence}');
}

iOS Simulator v12.4 using iOS v14.4: Every time (about 10 attempts) I hit listen and speak the b) widget prints 2-3 times and the stream listener prints in increments of 2. So the 1st pass, stream prints twice. 2nd pass stream prints 4 times, 3rd pass 6 times, etc.

Physical iPhone 6s using iOS v14.4: The b) widget prints once most of the time. I can't figure out the pattern. I can try 10 times and all attempts print once. Try another 10 attempts and 7 print once and 3 times prints twice. The stream listener prints in increments of 1, so the first attempt it prints once, 2nd pass prints 2 times, etc.

I don't know if any of this is useful or just vague rambling.

ab36245 commented 3 years ago

(I'm just getting back to using this excellent package after a long time dragged off onto other things)

I can confirm the problem with pauseFor on the iOS Simulator.

I am using the null-safe version of the package (4.0.0-nullsafety) and I am running the example from the pub.dev page (https://pub.dev/packages/speech_to_text/versions/4.0.0-nullsafety/example). The only modifications I have made to the example code are as follows:

@@ -235,5 +235,12 @@
   void resultListener(SpeechRecognitionResult result) {
     ++resultListened;
-    print('Result listener $resultListened');
+    final now = DateTime.now().toIso8601String();
+    print('$now resultListener: final ${result.finalResult}');
+    if (result.finalResult) {
+      print('  no. alternates ${result.alternates.length}');
+      for (final a in result.alternates) {
+        print('  - ${a.recognizedWords} (${a.confidence})');
+      }
+    }
     setState(() {
       lastWords = '${result.recognizedWords} - ${result.finalResult}';
@@ -258,6 +265,6 @@

   void statusListener(String status) {
-    // print(
-    // 'Received listener status: $status, listening: ${speech.isListening}');
+    final now = DateTime.now().toIso8601String();
+    print('$now statusListener: Received listener status: $status, listening: ${speech.isListening}');
     setState(() {
       lastStatus = '$status';

In other words I print a little more information in each call to resultListener and also print each call to statusListener.

When I run in the iOS Simulator specifying iPhone 8 with iOS 14.4 I get the following output:

flutter: 2021-03-20T18:03:07.739014 statusListener: Received listener status: listening, listening: true flutter: 2021-03-20T18:03:09.583452 resultListener: final false flutter: 2021-03-20T18:03:09.750945 resultListener: final false flutter: 2021-03-20T18:03:10.140940 resultListener: final false flutter: 2021-03-20T18:03:10.371313 resultListener: final false flutter: 2021-03-20T18:03:10.469003 resultListener: final false flutter: 2021-03-20T18:03:17.746529 statusListener: Received listener status: notListening, listening: false flutter: 2021-03-20T18:03:17.761650 statusListener: Received listener status: notListening, listening: false flutter: 2021-03-20T18:03:17.847628 resultListener: final true flutter: no. alternates 1 flutter: - Can you hear me (0.582) flutter: 2021-03-20T18:03:17.910716 resultListener: final false flutter: 2021-03-20T18:03:17.946371 resultListener: final true flutter: no. alternates 6 flutter: - Can you hear me (0.616) flutter: - Can you hear (0.683) flutter: - Can you him (0.61) flutter: - Can you heal me (0.592) flutter: - Can you help (0.595) flutter: - Can you help me (0.592) flutter: 2021-03-20T18:03:17.947894 statusListener: Received listener status: notListening, listening: false

This shows 5 partial results while I am speaking (final == false). It then shows two (?) calls to statusListener as pauseFor times out (pauseFor is set to 5 seconds in the example but there seems to be a gap of more like 7 seconds after the last partial result). Then I get a run of 3 calls to resultListener, the first has finalResult set to true, then the second has finalResult set to false (??) and the third has finalResult set to true again. Notice that for a final result I have printed the alternates (as I'm really interested in them). The first "final" result does not include alternates (i.e. it looks just like a partial result but with finalResult set to true). The second final result (the third call to resultListener after the pauseFor timeout happened) includes alternates and looks more like a genuine final result.

I'm guessing this isn't the expected behaviour? If it is, is there a way to distinguish between the pseudo (first) final result and the real one? I don't want to rely on knowing I'll get a sequence of final/not final/final results because that feels very fragile.

I am just getting back to this package, so I am still in the process of setting up to test on a real iOS device as well as both an Android emulator and a real Android device. I will check those as I progress...

ab36245 commented 3 years ago

I can reproduce the behaviour on a real iOS device, a 7th gen iPad running iOS 14.4.1. Here is the same output from this device (there appears to be more debug generated by the plugin itself):

[plugin] HypothesizeTranscription
[plugin] Encoded JSON result: {"alternates":[{"recognizedWords":"I'm listening you say","confidence":0.881}],"finalResult":false}
[plugin] invokeFlutter textRecognition
flutter: 2021-03-20T20:54:59.588787 resultListener: final false
69 [plugin] invokeFlutter soundLevelChange
[plugin] invokeFlutter notifyStatus
[plugin] Finished reading audio
[plugin] invokeFlutter notifyStatus
flutter: 2021-03-20T20:55:06.481517 statusListener: Received listener status: notListening, listening: false
flutter: 2021-03-20T20:55:06.482147 statusListener: Received listener status: notListening, listening: false
flutter: 2021-03-20T20:55:06.589004 resultListener: final true
flutter:   no. alternates 1
flutter:   - I'm listening you say (0.881)
[plugin] HypothesizeTranscription
[plugin] Encoded JSON result: {"alternates":[{"recognizedWords":"I'm listening you say","confidence":0.881}],"finalResult":false}
[plugin] invokeFlutter textRecognition
[plugin] FinishRecognition true
[plugin] Encoded JSON result: {"alternates":[{"recognizedWords":"I'm listening you say","confidence":0.881},{"recognizedWords":"I'm listening to you say","confidence":0.704}],"finalResult":true}
[plugin] invokeFlutter textRecognition
[plugin] FinishSuccessfully
[plugin] invokeFlutter notifyStatus
flutter: 2021-03-20T20:55:06.610140 resultListener: final false
flutter: 2021-03-20T20:55:06.611101 resultListener: final true
flutter:   no. alternates 2
flutter:   - I'm listening you say (0.881)
flutter:   - I'm listening to you say (0.704)
flutter: 2021-03-20T20:55:06.611475 statusListener: Received listener status: notListening, listening: false
sowens-csd commented 3 years ago

Thanks for posting. No, this isn't the expected result. I think there might be a problem with the way I was synthetically creating final results on iOS. I've just committed a change that provides control over that behaviour using a new finalTimeout value on the initialize method.

I'll do further tests to try to reproduce your results and see if this change resolves them. If you're curious you could test against the repo version to see if it changes the behaviour.

sowens-csd commented 3 years ago

Good news, I was able to reproduce that behaviour and this change does seem to resolve it. There is still an issue if you set the finalTimeout too low, like 100 ms, I'll try to fix that as well and then release this version.

sowens-csd commented 3 years ago

My latest commit should ensure only a single final result even if you set a short finalTimeout. That resolves all the causes of this behaviour I can think of. Let me know if you have a chance to give it a try @ab36245.

ab36245 commented 3 years ago

Thanks very much @sowens-csd. Impressive turnaround!

Looks good on the simulator so far:

flutter: 2021-03-21T12:38:03.862388 statusListener: Received listener status: listening, listening: true
flutter: 2021-03-21T12:38:05.542383 resultListener: final false
flutter: 2021-03-21T12:38:05.863257 resultListener: final false
flutter: 2021-03-21T12:38:06.061371 resultListener: final false
flutter: 2021-03-21T12:38:06.496103 resultListener: final false
flutter: 2021-03-21T12:38:06.729233 resultListener: final false
flutter: 2021-03-21T12:38:06.829229 resultListener: final false
flutter: 2021-03-21T12:38:13.886688 statusListener: Received listener status: notListening, listening: false
flutter: 2021-03-21T12:38:13.901414 statusListener: Received listener status: notListening, listening: false
flutter: 2021-03-21T12:38:14.044987 resultListener: final false
flutter: 2021-03-21T12:38:14.051626 resultListener: final true
flutter:   no. alternates 6
flutter:   - You say I'm listening (0.607)
flutter:   - Can you say I'm listening (0.524)
flutter:   - And you say I'm listening (0.524)
flutter:   - Are you say I'm listening (0.524)
flutter:   - Do you say I'm listening (0.524)
flutter:   - You say I am listening (0.454)
flutter: 2021-03-21T12:38:14.055538 statusListener: Received listener status: notListening, listening: false

Only one final result, which is just what I'm after. I'll be able to check on the iPad a little later.

Two minor questions (and happy to open separate issues if necessary):

  1. There seem to be two (identical) calls to onStatus handler about 20ms apart when the pauseFor timeout kicks in. It's not a big deal, just wondering!
  2. The time between the last partial result and the first notListening status (as the pauseFor timeout happens) is more like 7 seconds rather than the pauseFor value of 5. Is this expected? Are we looking at network latency or something else?

I'll update again when I've checked the true iOS device.

ab36245 commented 3 years ago

Checks out on true iOS device too. Thank you

sowens-csd commented 3 years ago

Excellent, thanks for checking it out. Let's create a new issue for those other two items. We can close this one as soon as I push out the next release.

ab36245 commented 3 years ago

Happy to open another issue for the other questions. The first is minor and the second I'll wait to have some more concrete data before I start bothering you with it.

sowens-csd commented 3 years ago

I've published 4.1.0-nullsafety to pub.dev with this fix. Thanks for the help troubleshooting @ab36245!

For anyone following this thread in the future you shouldn't need to take any action. This issue is resolved in the latest version by default. The finalTimeout property provides even more control but you shouldn't need to use it unless you want to completely disable guaranteed final results or want a timeout of longer than two seconds.

sowens-csd commented 3 years ago

For the second issue @ab36245, when you are testing if you could use the latest version in the repo instead of the released version that would be helpful. I opened a separate issue, #191 and I've committed a potential fix. Some more testing on that resolution would be very helpful. Thanks!

ab36245 commented 3 years ago

Will do. I should get a chance to check tomorrow