speechmatics / speechmatics-js-sdk

Javascript and Typescript SDK for Speechmatics
MIT License
39 stars 6 forks source link

Error trying to use speechmatics from my browser #26

Closed Success-akinyemi closed 10 months ago

Success-akinyemi commented 11 months ago

I tried to use the real-time-speech-to-text api from my browser using react it is not working i keep getting this error: { "message": "Error", "type": "protocol_error", "reason": "Unsupported Data" }

i have generated jwt secret using my api key i don't what the error is for exactly and how i can resolve it. any assistance thanks

nickgerig commented 11 months ago

Hi @Success-akinyemi

Do you have some code we can look at?

Success-akinyemi commented 11 months ago

import './App.css'; import { Component } from 'react'; import { RealtimeSession } from 'speechmatics';

class App extends Component { constructor(props) { super(props); this.state = { transcript: '', recording: false, };

this.apikey = import.meta.env.VITE_API_KEY;
this.realtimeSession = new RealtimeSession({ apiKey: this.apikey });
console.log(this.apikey)

}

startRecording = () => { this.realtimeSession .start({ transcription_config: { language: 'en', operating_point: 'enhanced', enable_partials: 'true', }, audio_format: { type: 'raw' }, }) .then(() => { this.setState({ recording: true }); this.setupMediaRecorder(); }) .catch((error) => { console.log('ERROR STARTING THE SESSION:', error); }); };

stopRecording = () => { this.mediaRecorder.stop(); this.setState({ recording: false }); };

onData = (audioData) => { if (this.state.recording) { this.mediaRecorder.start(); } };

setupMediaRecorder = async () => { const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); this.mediaRecorder = new MediaRecorder(stream, { mimeType: 'audio/webm;codecs=opus', audioBitsPerSecond: 16000, });

this.mediaRecorder.ondataavailable = (event) => {
  if (event.data.size > 0) {
    this.realtimeSession.sendAudio(event.data);
  }
};

};

componentDidMount() { this.realtimeSession.addListener('AddTranscript', (message) => { this.setState((prevState) => ({ transcript: prevState.transcript + message.metadata.transcript + ' ' })); });

this.realtimeSession.addListener('EndOfTranscript', () => {
  this.setState((prevState) => ({ transcript: prevState.transcript + '\n' }));
});

}

render() { return (

Live Transcription

{this.state.recording ? ( ) : ( )}
);

} }

export default App;

that is the full code @nickgerig

mnemitz commented 11 months ago

@Success-akinyemi I haven't tried running your code yet, but based on the error I suspect the error is to do with audio encoding.

When you call start(), you've provided this argument, specifying raw as the audio type:

this.realtimeSession.start({
    transcription_config: {
        language: 'en',
        operating_point: 'enhanced',
        enable_partials: 'true',
    },
    audio_format: { type: 'raw' },
})

Then when initializing MediaRecorder, you've specified the codec as opus:

this.mediaRecorder = new MediaRecorder(stream, {
    mimeType: 'audio/webm;codecs=opus',
    audioBitsPerSecond: 16000,
});

For raw type audio, the data supplied must be PCM, and not a lossy compressed encoding like Opus.I think if you remove the codecs=opus segment from the mimeType, the default should be correct. Let us know if you encounter any issues after that.

Otherwise, you could try setting type: 'file' instead of raw, which may also accept the compressed data.

Success-akinyemi commented 11 months ago

import './App.css'; import { Component } from 'react'; import { RealtimeSession } from 'speechmatics';

class App extends Component { constructor(props) { super(props); this.state = { transcript: '', recording: false, };

this.apikey = import.meta.env.VITE_API_KEY;
this.realtimeSession = new RealtimeSession({ apiKey: this.apikey });
console.log(this.apikey)

}

startRecording = () => { this.realtimeSession .start({ message: 'StartRecognition', transcription_config: { language: 'en', operating_point: 'enhanced', enable_partials: true, output_locale: "en-US", diarization: "speaker", }, audio_format: { type: "raw", encoding: "pcm_f32le", sample_rate: 16000 }, }) .then(() => { this.setState({ recording: true }); this.setupMediaRecorder(); }) .catch((error) => { console.log('ERROR STARTING THE SESSION:', error); }); };

stopRecording = () => { this.mediaRecorder.stop(); this.setState({ recording: false }); };

onData = (audioData) => { if (this.state.recording) { this.mediaRecorder.start(); } };

setupMediaRecorder = async () => { const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); this.mediaRecorder = new MediaRecorder(stream, { mimeType: 'audio/webm', audioBitsPerSecond: 16000, });

this.mediaRecorder.ondataavailable = (event) => {
  if (event.data.size > 0) {
    this.realtimeSession.sendAudio(event.data);
  }
};

};

componentDidMount() { this.realtimeSession.addListener('AddTranscript', (message) => { this.setState((prevState) => ({ transcript: prevState.transcript + message.metadata.transcript + ' ' })); });

this.realtimeSession.addListener('EndOfTranscript', () => {
  this.setState((prevState) => ({ transcript: prevState.transcript + '\n' }));
});

}

render() { console.log('TRANS',this.state) return (

Live Transcription

{this.state.recording ? ( ) : ( )}
);

} }

export default App;

i have made changes to the code. no i don't get any error message but i cannot see any transcribed data back

Success-akinyemi commented 11 months ago

I have modified sections of my code to work but i am not getting any meaningful data back: the code: import './App.css'; import { Component } from 'react'; import { RealtimeSession } from 'speechmatics';

class App extends Component { constructor(props) { super(props); this.state = { transcript: '', recording: false, };

this.apikey = import.meta.env.VITE_API_KEY;
this.realtimeSession = new RealtimeSession({ apiKey: this.apikey });
this.mediaRecorder = null; 

}

startRecording = () => { this.realtimeSession .start({ message: 'StartRecognition', transcription_config: { language: 'en', operating_point: 'enhanced', enable_partials: true, output_locale: 'en-US', diarization: 'speaker', }, audio_format: { type: 'raw', encoding: 'pcm_f32le', sample_rate: 16000, }, }) .then(() => { this.setState({ recording: true }); this.setupMediaRecorder(); }) .catch((error) => { console.log('ERROR STARTING THE SESSION:', error); }); };

stopRecording = () => { if (this.mediaRecorder) { this.mediaRecorder.stop(); } this.realtimeSession.stop(); this.setState({ recording: false }); };

setupMediaRecorder = async () => { const stream = await navigator.mediaDevices.getUserMedia({ audio: true });

this.mediaRecorder = new MediaRecorder(stream, {
  mimeType: 'audio/webm',
  audioBitsPerSecond: 16000,
});

this.mediaRecorder.ondataavailable = (event) => {
  if (event.data.size > 0) {
    this.realtimeSession.sendAudio(event.data);
  }
};

this.mediaRecorder.onstop = () => {
  if (this.state.recording) {
    this.realtimeSession.start();
  }
};

};

componentDidMount() { this.realtimeSession.addListener('AddTranscript', (message) => { this.setState((prevState) => ({ transcript: prevState.transcript + message.metadata.transcript + ' ' })); });

this.realtimeSession.addListener('EndOfTranscript', () => {
  this.setState((prevState) => ({ transcript: prevState.transcript + '\n' }));
});

}

render() { return (

Live Transcription

{this.state.recording ? ( ) : ( )}
{console.log('TRANS', this.state)}
);

} }

export default App;

from the last console.log i get this message in my output: { "transcript": "\n\n", "recording": false }

@mnemitz @nickgerig

nickgerig commented 11 months ago

@Success-akinyemi See below for a working version of your code, the MediaRecorder.start(500) wasn't being called and the audio_format was wrong, you can just let the SDK use the default type:'file'. Also if you set max_delay to 2 then you'll get results quicker which is easier for testing.

import './App.css';
import { Component } from 'react';
import { RealtimeSession } from 'speechmatics';

class App extends Component {
  constructor(props) {
    super(props);
    this.state = {
      transcript: '',
      recording: false,
    };

    this.apikey = import.meta.env.VITE_API_KEY;
    this.realtimeSession = new RealtimeSession({ apiKey: this.apikey });
    this.mediaRecorder = null;
  }

  startRecording = () => {
    this.realtimeSession
      .start({
        message: 'StartRecognition',
        transcription_config: {
          language: 'en',
          operating_point: 'enhanced',
          enable_partials: true,
          output_locale: 'en-US',
          diarization: 'speaker',
          max_delay: 2,
        }
      })
      .then(() => {
        this.setState({ recording: true });
        this.setupMediaRecorder();
      })
      .catch((error) => {
        console.log('ERROR STARTING THE SESSION:', error);
      });
  };

  stopRecording = () => {
    if (this.mediaRecorder) {
      this.mediaRecorder.stop();
    }
    this.realtimeSession.stop();
    this.setState({ recording: false });
  };

  setupMediaRecorder = async () => {
    const stream = await navigator.mediaDevices.getUserMedia({ audio: true });

    this.mediaRecorder = new MediaRecorder(stream, {
      mimeType: 'audio/webm',
      audioBitsPerSecond: 16000,
    });

    this.mediaRecorder.ondataavailable = (event) => {
      console.log('DATA', event.data);
      if (event.data.size > 0) {
        this.realtimeSession.sendAudio(event.data);
      }
    };

    this.mediaRecorder.onstop = () => {
      if (this.state.recording) {
        this.realtimeSession.start();
      }
    };
    this.mediaRecorder.start(500);
  };

  componentDidMount() {
    this.realtimeSession.addListener('AddTranscript', (message) => {
      this.setState((prevState) => ({ transcript: prevState.transcript + message.metadata.transcript + ' ' }));
    });

    this.realtimeSession.addListener('EndOfTranscript', () => {
      this.setState((prevState) => ({ transcript: prevState.transcript + '\n' }));
    });
  }

  render() {
    return (
      <div className='App'>
        <div>
          {this.state.recording ? (
            <button onClick={this.stopRecording}>Stop Recording</button>
          ) : (
            <button onClick={this.startRecording}>Start Recording</button>
          )}
        </div>

        <textarea
          cols='50'
          rows='10'
          value={this.state.transcript}
          readOnly
          placeholder='Transcription Output...'
          style={{ color: 'black' }}
        ></textarea>
        {console.log('TRANS', this.state)}
      </div>
    );
  }
}

export default App;
Success-akinyemi commented 10 months ago

Thanks @nickgerig for the correction it works okay now with Live transcription from audio input from mic. I really apperciate

nickgerig commented 10 months ago

No problem, glad you got it working. We'll get some more samples into the repo soon which will make it easier to quickstart!