Open goxr3plus opened 6 years ago
Here is working code Google Cloud Speech Official
Any problems you might have about setting the credentials check this stackoverflow question i did :
For some reason it has the same problem as this library , stopping after 65 seconds , google has made it like this .... gonna find a work around soon
Check this -> https://github.com/GoogleCloudPlatform/google-cloud-java/issues/3188
package googleSpeech;
import java.io.IOException;
import java.sql.Date;
import java.time.LocalDate;
import java.util.Arrays;
import java.util.HashMap;
import javax.sound.sampled.AudioFormat;
import javax.sound.sampled.AudioInputStream;
import javax.sound.sampled.AudioSystem;
import javax.sound.sampled.DataLine;
import javax.sound.sampled.Line;
import javax.sound.sampled.LineUnavailableException;
import javax.sound.sampled.Mixer;
import javax.sound.sampled.TargetDataLine;
import com.google.api.gax.rpc.ClientStream;
import com.google.api.gax.rpc.ResponseObserver;
import com.google.api.gax.rpc.StreamController;
import com.google.auth.oauth2.AccessToken;
import com.google.auth.oauth2.GoogleCredentials;
import com.google.cloud.speech.v1.RecognitionConfig;
import com.google.cloud.speech.v1.SpeechClient;
import com.google.cloud.speech.v1.StreamingRecognitionConfig;
import com.google.cloud.speech.v1.StreamingRecognizeRequest;
import com.google.cloud.speech.v1.StreamingRecognizeResponse;
import com.google.protobuf.ByteString;
public class GoogleSpeechTest {
public GoogleSpeechTest() {
//Set credentials?
// GoogleCredentials credentials = GoogleCredentials.create(new AccessToken("AIzaSyCtrBlhBiqNd7kI4BiOn2kWiCYlwp1azVM",Date.valueOf(LocalDate.now())));
// System.out.print(credentials.getAccessToken());
//Target data line
TargetDataLine microphone;
AudioInputStream audio = null;
//Check if Microphone is Supported
checkMicrophoneAvailability();
//Print available mixers
//printAvailableMixers();
//Capture Microphone Audio Data
try {
// Signed PCM AudioFormat with 16kHz, 16 bit sample size, mono
AudioFormat format = new AudioFormat(16000, 16, 1, true, false);
DataLine.Info info = new DataLine.Info(TargetDataLine.class, format);
//Check if Microphone is Supported
if (!AudioSystem.isLineSupported(info)) {
System.out.println("Microphone is not available");
System.exit(0);
}
//Get the target data line
microphone = (TargetDataLine) AudioSystem.getLine(info);
microphone.open(format);
microphone.start();
//Audio Input Stream
audio = new AudioInputStream(microphone);
} catch (Exception ex) {
ex.printStackTrace();
}
//Send audio from Microphone to Google Servers and return Text
try (SpeechClient client = SpeechClient.create()) {
ResponseObserver<StreamingRecognizeResponse> responseObserver = new ResponseObserver<StreamingRecognizeResponse>() {
public void onStart(StreamController controller) {
System.out.println("Started....");
}
public void onResponse(StreamingRecognizeResponse response) {
System.out.println(response.getResults(0));
}
public void onComplete() {
System.out.println("Complete");
}
public void onError(Throwable t) {
System.err.println(t);
}
};
ClientStream<StreamingRecognizeRequest> clientStream = client.streamingRecognizeCallable().splitCall(responseObserver);
RecognitionConfig recConfig = RecognitionConfig.newBuilder().setEncoding(RecognitionConfig.AudioEncoding.LINEAR16).setLanguageCode("en-US").setSampleRateHertz(16000)
.build();
StreamingRecognitionConfig config = StreamingRecognitionConfig.newBuilder().setConfig(recConfig).build();
StreamingRecognizeRequest request = StreamingRecognizeRequest.newBuilder().setStreamingConfig(config).build(); // The first request in a streaming call has to be a config
clientStream.send(request);
//Infinity loop from microphone
while (true) {
byte[] data = new byte[10];
try {
audio.read(data);
} catch (IOException e) {
System.out.println(e);
}
request = StreamingRecognizeRequest.newBuilder().setAudioContent(ByteString.copyFrom(data)).build();
clientStream.send(request);
}
} catch (Exception e) {
System.out.println(e);
}
}
/**
* Checks if the Microphone is available
*/
public static void checkMicrophoneAvailability() {
enumerateMicrophones().forEach((string , info) -> {
System.out.println("Name :" + string);
});
}
/**
* Generates a hashmap to simplify the microphone selection process. The keyset is the name of the audio device's Mixer The value is the first
* lineInfo from that Mixer.
*
* @author Aaron Gokaslan (Skylion)
* @return The generated hashmap
*/
public static HashMap<String,Line.Info> enumerateMicrophones() {
HashMap<String,Line.Info> out = new HashMap<String,Line.Info>();
Mixer.Info[] mixerInfos = AudioSystem.getMixerInfo();
for (Mixer.Info info : mixerInfos) {
Mixer m = AudioSystem.getMixer(info);
Line.Info[] lineInfos = m.getTargetLineInfo();
if (lineInfos.length >= 1 && lineInfos[0].getLineClass().equals(TargetDataLine.class))//Only adds to hashmap if it is audio input device
out.put(info.getName(), lineInfos[0]);//Please enjoy my pun
}
return out;
}
/**
* Print available mixers
*/
public void printAvailableMixers() {
//Get available Mixers
Mixer.Info[] mixerInfos = AudioSystem.getMixerInfo();
//Print available Mixers
Arrays.asList(mixerInfos).forEach(info -> {
System.err.println("\n-----------Mixer--------------");
Mixer mixer = AudioSystem.getMixer(info);
System.err.println("\nSource Lines");
//SourceLines
Arrays.asList(mixer.getSourceLineInfo()).forEach(lineInfo -> {
//Line Name
System.out.println(info.getName() + "---" + lineInfo);
Line line = null;
try {
line = mixer.getLine(lineInfo);
} catch (LineUnavailableException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
System.out.println("\t-----" + line);
});
System.err.println("\nTarget Lines");
//TargetLines
Arrays.asList(mixer.getTargetLineInfo()).forEach(lineInfo -> {
//Line Name
System.out.println(mixer + "---" + lineInfo);
Line line = null;
try {
line = mixer.getLine(lineInfo);
} catch (LineUnavailableException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
System.out.println("\t-----" + line);
});
});
}
public static void main(String[] args) {
new GoogleSpeechTest();
}
}
@goxr3plus Is it the new Speech API on Google can use for commercial?
Yes you can use the Official Google Library Commercialy :) Check their repo.
By the way i have played with The Speech API google official library .. i find it pretty tricky for junior developers . You need to be good in Java to understand it .... unlike this repository here which makes it very simple.
Thanks @goxr3plus . So Your project only base on based on Chromium Speech API key haven't to add support for the Official Google Cloud Speech API now ? By the way , The GoogleSpeechTest .java is a part of demo code of use the Official Google Cloud Speech API ?
By the way, Is it free use the official Google Library for commercial? I want to use speech to text api free. Thanks.
Yes the official one can be used Commercially. My examples on the read me are for this repository. The code you see above is for the official Google Speech API.
On Mon, Mar 4, 2019, 13:37 DorisGM notifications@github.com wrote:
By the way, Is it free use the official Google Library for commercial? I want to use speech to text api free. Thanks.
— You are receiving this because you were mentioned. Reply to this email directly, view it on GitHub https://github.com/goxr3plus/java-google-speech-api/issues/3#issuecomment-469221809, or mute the thread https://github.com/notifications/unsubscribe-auth/ATbiwJBLDZPTT2Rsi7FTldFIuYy8OyGbks5vTQWSgaJpZM4S6p6P .
Please search Google Cloud Speech ato Text
On Mon, Mar 4, 2019, 19:23 GoXR3Plus Studio cralexcomp@gmail.com wrote:
Yes the official one can be used Commercially. My examples on the read me are for this repository. The code you see above is for the official Google Speech API.
On Mon, Mar 4, 2019, 13:37 DorisGM notifications@github.com wrote:
By the way, Is it free use the official Google Library for commercial? I want to use speech to text api free. Thanks.
— You are receiving this because you were mentioned. Reply to this email directly, view it on GitHub https://github.com/goxr3plus/java-google-speech-api/issues/3#issuecomment-469221809, or mute the thread https://github.com/notifications/unsubscribe-auth/ATbiwJBLDZPTT2Rsi7FTldFIuYy8OyGbks5vTQWSgaJpZM4S6p6P .
What exactly can I use for commercial use? I didn't understand that really. Can I use your code for commercial or do I have to look for another Google speech library? I'm a little bit confused now, sorry, so what exactly cn I use for commercial.
Well Google used to have two apis.
Private Speech Api and the other one for commercial use. This library supports the private speech api, you should go to the official Google Speech Api Library and that above example is written for it :)
Sorry sometimes I'm a little bit slow with understanding. My English isn't so good. So when I understood this right I can't use the java-google-speech-api for commercial use, can I? But the library under this link https://github.com/googleapis/java-speech is for commercial use, right? And for what exactly is your script, you wrote above?
So when I wrote this library there was not any official library for Java :).
Now that it exists I would definitely go with that.
THE script above is a script for using that library you added on link. If the script still works because it's been time
:)
Ok but the library I get to with the link is for commercial use right? Sorry if I'm annoying but thank you for your answers!
A further question: I don't see where your code is giving me an answer from the server. Could you tell me the line where this happens? And could you please answer the question in the comment above this comment? Thak you very much!
@Jochen-sys No you are not annoying don't worry.
It's been 3 years since i lastly played with Google Speech Recognition many things have changed , please follow the official documentation for new examples and ask questions on their repository about that .
That's how i did it back then :)
https://github.com/googleapis/java-speech
Open issues to them asking what is not working for you etc . For any further help if i know i would gladly be here . I am a React and React Native developer now :)
This project is based on Chromium Speech API key.That API has a lot stricter limits than the new Speech API on Google Cloud (which is also free).
We have to add support for the Official Google Cloud Speech API . I don't know if this would be hard or not but i know it should be done .
Google is releasing it's own library for that , though it is very very alpha check here