felixjunghans / google_speech

Flutter google spech
MIT License
68 stars 41 forks source link

Why Transcipt Result is Empty? #60

Closed ShankarJK closed 3 months ago

ShankarJK commented 5 months ago

Hey Devs, I've been working with the google_speech package in Flutter, using the record package to stream audio. I've managed to implement streamingRecognize, but I'm consistently receiving empty transcript results. Could someone take a look at the code and results I've attached and help my day?

Code:

import 'dart:async';

import 'package:flutter/material.dart';
import 'package:google_speech/google_speech.dart';
import 'package:record/record.dart';
import 'package:rxdart/rxdart.dart';

void main() {
  runApp(const MyApp());
}

class MyApp extends StatelessWidget {
  const MyApp({Key? key}) : super(key: key);

  // This widget is the root of your application.
  @override
  Widget build(BuildContext context) {
    return MaterialApp(
      title: 'Mic Stream Example',
      theme: ThemeData(
        primarySwatch: Colors.blue,
        visualDensity: VisualDensity.adaptivePlatformDensity,
      ),
      home: const AudioRecognize(),
    );
  }
}

class AudioRecognize extends StatefulWidget {
  const AudioRecognize({Key? key}) : super(key: key);

  @override
  State<StatefulWidget> createState() => _AudioRecognizeState();
}

class _AudioRecognizeState extends State<AudioRecognize> {
  final _recorder = AudioRecorder();

  bool recognizing = false;
  bool recognizeFinished = false;
  String text = '';
  StreamSubscription<List<int>>? _audioStreamSubscription;
  BehaviorSubject<List<int>>? _audioStream;

  @override
  void initState() {
    super.initState();
  }

  void streamingRecognize() async {
    _audioStream = BehaviorSubject<List<int>>();

    final stream = await _recorder.startStream(const RecordConfig(
      encoder: AudioEncoder.pcm16bits,
      sampleRate: 16000,
      // numChannels: 1,
    ));

    _audioStreamSubscription = stream.listen((event) {
      _audioStream!.add(event);
    });

    setState(() {
      recognizing = true;
    });
    final serviceAccount = ServiceAccount.fromString(r'''{-----my service account details ---}''');
    final speechToText = SpeechToText.viaServiceAccount(serviceAccount);
    final config = _getConfig();

    final responseStream = speechToText.streamingRecognize(
        StreamingRecognitionConfig(config: config, interimResults: true),
        _audioStream!);

    var responseText = '';

    responseStream.handleError((error) {
      print('Error in response stream: $error');
    });

    responseStream.listen((data) {
      print(data);
      print("-------");
      print(data.results[0].stability);
      final currentText =
          data.results.map((e) => e.alternatives.first.transcript).join('\n');

      if (data.results.first.isFinal) {
        responseText += '\n' + currentText;
        setState(() {
          text = responseText;
          recognizeFinished = true;
        });
      } else {
        setState(() {
          text = responseText + '\n' + currentText;
          recognizeFinished = true;
        });
      }
    }, onDone: () {
      print("done");
      setState(() {
        recognizing = false;
      });
    });
  }

  void stopRecording() async {
    await _recorder.stop();
    await _audioStreamSubscription?.cancel();
    await _audioStream?.close();
    setState(() {
      recognizing = false;
    });
  }

  RecognitionConfig _getConfig() => RecognitionConfig(
      encoding: AudioEncoding.LINEAR16,
      model: RecognitionModel.medical_conversation,
      enableAutomaticPunctuation: true,
      sampleRateHertz: 16000,
      languageCode: 'en-US');

  @override
  Widget build(BuildContext context) {
    return Scaffold(
      appBar: AppBar(
        title: const Text('Audio File Example'),
      ),
      body: Center(
        child: Column(
          mainAxisAlignment: MainAxisAlignment.spaceAround,
          children: <Widget>[
            if (recognizeFinished)
              _RecognizeContent(
                text: text,
              ),
            ElevatedButton(
              onPressed: recognizing ? stopRecording : streamingRecognize,
              child: recognizing
                  ? const Text('Stop recording')
                  : const Text('Start Streaming from mic'),
            ),
          ],
        ),
      ), // This trailing comma makes auto-formatting nicer for build methods.
    );
  }
}

class _RecognizeContent extends StatelessWidget {
  final String? text;

  const _RecognizeContent({Key? key, this.text}) : super(key: key);

  @override
  Widget build(BuildContext context) {
    return Padding(
      padding: const EdgeInsets.all(16.0),
      child: Column(
        children: <Widget>[
          const Text(
            'The text recognized by the Google Speech Api:',
          ),
          const SizedBox(
            height: 16.0,
          ),
          Text(
            text ?? '---',
            style: Theme.of(context).textTheme.bodyText1,
          ),
        ],
      ),
    );
  }
}

Output:

I/flutter (10604): results: {
I/flutter (10604):   alternatives: {
I/flutter (10604):   }
I/flutter (10604):   isFinal: true
I/flutter (10604):   resultEndTime: {
I/flutter (10604):     seconds: 2
I/flutter (10604):     nanos: 700000000
I/flutter (10604):   }
I/flutter (10604):   languageCode: en-us
I/flutter (10604): }
I/flutter (10604): totalBilledTime: {
I/flutter (10604):   seconds: 3
I/flutter (10604): }
I/flutter (10604): speechEventTime: {
I/flutter (10604): }
I/flutter (10604): requestId: 12345
I/flutter (10604): -------
I/flutter (10604): 0.0
I/flutter (10604): 0
D/EGL_emulation(10604): app_time_stats: avg=74.75ms min=1.14ms max=1456.12ms count=26
D/EGL_emulation(10604): app_time_stats: avg=24984.08ms min=24984.08ms max=24984.08ms count=1
D/EGL_emulation(10604): app_time_stats: avg=1478.59ms min=1478.59ms max=1478.59ms count=1
I/flutter (10604): results: {
I/flutter (10604):   alternatives: {
I/flutter (10604):   }
I/flutter (10604):   isFinal: true
I/flutter (10604):   resultEndTime: {
I/flutter (10604):     seconds: 28
I/flutter (10604):     nanos: 910000000
I/flutter (10604):   }
I/flutter (10604):   languageCode: en-us
I/flutter (10604): }
I/flutter (10604): totalBilledTime: {
I/flutter (10604):   seconds: 29
I/flutter (10604): }
I/flutter (10604): speechEventTime: {
I/flutter (10604): }
I/flutter (10604): requestId: 12345
I/flutter (10604): -------
I/flutter (10604): 0.0
I/flutter (10604): 0
D/EGL_emulation(10604): app_time_stats: avg=17408.37ms min=17408.37ms max=17408.37ms count=1
I/flutter (10604): results: {
I/flutter (10604):   alternatives: {
I/flutter (10604):   }
I/flutter (10604):   isFinal: true
I/flutter (10604):   resultEndTime: {
I/flutter (10604):     seconds: 75
I/flutter (10604):     nanos: 750000000
I/flutter (10604):   }
I/flutter (10604):   languageCode: en-us
I/flutter (10604): }
I/flutter (10604): totalBilledTime: {
I/flutter (10604):   seconds: 76
I/flutter (10604): }
I/flutter (10604): speechEventTime: {
I/flutter (10604): }
I/flutter (10604): requestId: 12345
I/flutter (10604): -------
I/flutter (10604): 0.0
I/flutter (10604): 0
D/EGL_emulation(10604): app_time_stats: avg=7034.31ms min=7034.31ms max=7034.31ms count=1
I/flutter (10604): results: {
I/flutter (10604):   alternatives: {
I/flutter (10604):   }
I/flutter (10604):   isFinal: true
I/flutter (10604):   resultEndTime: {
I/flutter (10604):     seconds: 342
I/flutter (10604):     nanos: 970000000
I/flutter (10604):   }
I/flutter (10604):   languageCode: en-us
I/flutter (10604): }
I/flutter (10604): totalBilledTime: {
I/flutter (10604):   seconds: 343
I/flutter (10604): }
I/flutter (10604): speechEventTime: {
I/flutter (10604): }
I/flutter (10604): requestId: 12345
I/flutter (10604): -------
I/flutter (10604): 0.0
I/flutter (10604): 0
D/EGL_emulation(10604): app_time_stats: avg=119228.35ms min=119228.35ms max=119228.35ms count=1