Success-akinyemi commented 11 months ago

I tried to use the real-time-speech-to-text api from my browser using react it is not working i keep getting this error: { "message": "Error", "type": "protocol_error", "reason": "Unsupported Data" }

i have generated jwt secret using my api key i don't what the error is for exactly and how i can resolve it. any assistance thanks

nickgerig commented 11 months ago

Hi @Success-akinyemi

Do you have some code we can look at?

Success-akinyemi commented 11 months ago

import './App.css'; import { Component } from 'react'; import { RealtimeSession } from 'speechmatics';

class App extends Component { constructor(props) { super(props); this.state = { transcript: '', recording: false, };

this.apikey = import.meta.env.VITE_API_KEY;
this.realtimeSession = new RealtimeSession({ apiKey: this.apikey });
console.log(this.apikey)

}

startRecording = () => { this.realtimeSession .start({ transcription_config: { language: 'en', operating_point: 'enhanced', enable_partials: 'true', }, audio_format: { type: 'raw' }, }) .then(() => { this.setState({ recording: true }); this.setupMediaRecorder(); }) .catch((error) => { console.log('ERROR STARTING THE SESSION:', error); }); };

stopRecording = () => { this.mediaRecorder.stop(); this.setState({ recording: false }); };

onData = (audioData) => { if (this.state.recording) { this.mediaRecorder.start(); } };

setupMediaRecorder = async () => { const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); this.mediaRecorder = new MediaRecorder(stream, { mimeType: 'audio/webm;codecs=opus', audioBitsPerSecond: 16000, });

this.mediaRecorder.ondataavailable = (event) => {
  if (event.data.size > 0) {
    this.realtimeSession.sendAudio(event.data);
  }
};

};

componentDidMount() { this.realtimeSession.addListener('AddTranscript', (message) => { this.setState((prevState) => ({ transcript: prevState.transcript + message.metadata.transcript + ' ' })); });

this.realtimeSession.addListener('EndOfTranscript', () => {
  this.setState((prevState) => ({ transcript: prevState.transcript + '\n' }));
});

}

render() { return (

Live Transcription

{this.state.recording ? ( ) : ( )}

);

} }

export default App;

that is the full code @nickgerig

mnemitz commented 11 months ago

@Success-akinyemi I haven't tried running your code yet, but based on the error I suspect the error is to do with audio encoding.

When you call start(), you've provided this argument, specifying raw as the audio type:

this.realtimeSession.start({
    transcription_config: {
        language: 'en',
        operating_point: 'enhanced',
        enable_partials: 'true',
    },
    audio_format: { type: 'raw' },
})

Then when initializing MediaRecorder, you've specified the codec as opus:

this.mediaRecorder = new MediaRecorder(stream, {
    mimeType: 'audio/webm;codecs=opus',
    audioBitsPerSecond: 16000,
});

For raw type audio, the data supplied must be PCM, and not a lossy compressed encoding like Opus.I think if you remove the codecs=opus segment from the mimeType, the default should be correct. Let us know if you encounter any issues after that.

Otherwise, you could try setting type: 'file' instead of raw, which may also accept the compressed data.

Success-akinyemi commented 11 months ago

import './App.css'; import { Component } from 'react'; import { RealtimeSession } from 'speechmatics';

class App extends Component { constructor(props) { super(props); this.state = { transcript: '', recording: false, };

this.apikey = import.meta.env.VITE_API_KEY;
this.realtimeSession = new RealtimeSession({ apiKey: this.apikey });
console.log(this.apikey)

}

startRecording = () => { this.realtimeSession .start({ message: 'StartRecognition', transcription_config: { language: 'en', operating_point: 'enhanced', enable_partials: true, output_locale: "en-US", diarization: "speaker", }, audio_format: { type: "raw", encoding: "pcm_f32le", sample_rate: 16000 }, }) .then(() => { this.setState({ recording: true }); this.setupMediaRecorder(); }) .catch((error) => { console.log('ERROR STARTING THE SESSION:', error); }); };

stopRecording = () => { this.mediaRecorder.stop(); this.setState({ recording: false }); };

onData = (audioData) => { if (this.state.recording) { this.mediaRecorder.start(); } };

setupMediaRecorder = async () => { const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); this.mediaRecorder = new MediaRecorder(stream, { mimeType: 'audio/webm', audioBitsPerSecond: 16000, });

this.mediaRecorder.ondataavailable = (event) => {
  if (event.data.size > 0) {
    this.realtimeSession.sendAudio(event.data);
  }
};

};

componentDidMount() { this.realtimeSession.addListener('AddTranscript', (message) => { this.setState((prevState) => ({ transcript: prevState.transcript + message.metadata.transcript + ' ' })); });

this.realtimeSession.addListener('EndOfTranscript', () => {
  this.setState((prevState) => ({ transcript: prevState.transcript + '\n' }));
});

}

render() { console.log('TRANS',this.state) return (

Live Transcription

{this.state.recording ? ( ) : ( )}

);

} }

export default App;

i have made changes to the code. no i don't get any error message but i cannot see any transcribed data back

Success-akinyemi commented 11 months ago

I have modified sections of my code to work but i am not getting any meaningful data back: the code: import './App.css'; import { Component } from 'react'; import { RealtimeSession } from 'speechmatics';

class App extends Component { constructor(props) { super(props); this.state = { transcript: '', recording: false, };

this.apikey = import.meta.env.VITE_API_KEY;
this.realtimeSession = new RealtimeSession({ apiKey: this.apikey });
this.mediaRecorder = null;

}

startRecording = () => { this.realtimeSession .start({ message: 'StartRecognition', transcription_config: { language: 'en', operating_point: 'enhanced', enable_partials: true, output_locale: 'en-US', diarization: 'speaker', }, audio_format: { type: 'raw', encoding: 'pcm_f32le', sample_rate: 16000, }, }) .then(() => { this.setState({ recording: true }); this.setupMediaRecorder(); }) .catch((error) => { console.log('ERROR STARTING THE SESSION:', error); }); };

stopRecording = () => { if (this.mediaRecorder) { this.mediaRecorder.stop(); } this.realtimeSession.stop(); this.setState({ recording: false }); };

setupMediaRecorder = async () => { const stream = await navigator.mediaDevices.getUserMedia({ audio: true });

this.mediaRecorder = new MediaRecorder(stream, {
  mimeType: 'audio/webm',
  audioBitsPerSecond: 16000,
});

this.mediaRecorder.ondataavailable = (event) => {
  if (event.data.size > 0) {
    this.realtimeSession.sendAudio(event.data);
  }
};

this.mediaRecorder.onstop = () => {
  if (this.state.recording) {
    this.realtimeSession.start();
  }
};

};

componentDidMount() { this.realtimeSession.addListener('AddTranscript', (message) => { this.setState((prevState) => ({ transcript: prevState.transcript + message.metadata.transcript + ' ' })); });

this.realtimeSession.addListener('EndOfTranscript', () => {
  this.setState((prevState) => ({ transcript: prevState.transcript + '\n' }));
});

}

render() { return (

Live Transcription

{this.state.recording ? ( ) : ( )}

{console.log('TRANS', this.state)}

);

} }

export default App;

from the last console.log i get this message in my output: { "transcript": "\n\n", "recording": false }

@mnemitz @nickgerig

nickgerig commented 11 months ago

@Success-akinyemi See below for a working version of your code, the MediaRecorder.start(500) wasn't being called and the audio_format was wrong, you can just let the SDK use the default type:'file'. Also if you set max_delay to 2 then you'll get results quicker which is easier for testing.

import './App.css';
import { Component } from 'react';
import { RealtimeSession } from 'speechmatics';

class App extends Component {
  constructor(props) {
    super(props);
    this.state = {
      transcript: '',
      recording: false,
    };

    this.apikey = import.meta.env.VITE_API_KEY;
    this.realtimeSession = new RealtimeSession({ apiKey: this.apikey });
    this.mediaRecorder = null;
  }

  startRecording = () => {
    this.realtimeSession
      .start({
        message: 'StartRecognition',
        transcription_config: {
          language: 'en',
          operating_point: 'enhanced',
          enable_partials: true,
          output_locale: 'en-US',
          diarization: 'speaker',
          max_delay: 2,
        }
      })
      .then(() => {
        this.setState({ recording: true });
        this.setupMediaRecorder();
      })
      .catch((error) => {
        console.log('ERROR STARTING THE SESSION:', error);
      });
  };

  stopRecording = () => {
    if (this.mediaRecorder) {
      this.mediaRecorder.stop();
    }
    this.realtimeSession.stop();
    this.setState({ recording: false });
  };

  setupMediaRecorder = async () => {
    const stream = await navigator.mediaDevices.getUserMedia({ audio: true });

    this.mediaRecorder = new MediaRecorder(stream, {
      mimeType: 'audio/webm',
      audioBitsPerSecond: 16000,
    });

    this.mediaRecorder.ondataavailable = (event) => {
      console.log('DATA', event.data);
      if (event.data.size > 0) {
        this.realtimeSession.sendAudio(event.data);
      }
    };

    this.mediaRecorder.onstop = () => {
      if (this.state.recording) {
        this.realtimeSession.start();
      }
    };
    this.mediaRecorder.start(500);
  };

  componentDidMount() {
    this.realtimeSession.addListener('AddTranscript', (message) => {
      this.setState((prevState) => ({ transcript: prevState.transcript + message.metadata.transcript + ' ' }));
    });

    this.realtimeSession.addListener('EndOfTranscript', () => {
      this.setState((prevState) => ({ transcript: prevState.transcript + '\n' }));
    });
  }

  render() {
    return (
      <div className='App'>
        <div>
          {this.state.recording ? (
            <button onClick={this.stopRecording}>Stop Recording</button>
          ) : (
            <button onClick={this.startRecording}>Start Recording</button>
          )}
        </div>

        <textarea
          cols='50'
          rows='10'
          value={this.state.transcript}
          readOnly
          placeholder='Transcription Output...'
          style={{ color: 'black' }}
        ></textarea>
        {console.log('TRANS', this.state)}
      </div>
    );
  }
}

export default App;

Success-akinyemi commented 10 months ago

Thanks @nickgerig for the correction it works okay now with Live transcription from audio input from mic. I really apperciate

nickgerig commented 10 months ago

No problem, glad you got it working. We'll get some more samples into the repo soon which will make it easier to quickstart!

speechmatics / speechmatics-js-sdk

Error trying to use speechmatics from my browser #26

Live Transcription

Live Transcription

Live Transcription