Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion speech/cloud-client/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,5 +34,5 @@ You can then run a given `ClassName` via:
### Transcribe a remote audio file (using the recognize sample)
```
mvn exec:java -Dexec.mainClass=com.example.speech.Recognize \
-Dexec.args="syncrecognize 'gs://java-docs-samples-tests/speech/brooklyn.flac'"
-Dexec.args="syncrecognize 'gs://cloud-samples-tests/speech/brooklyn.flac'"
```
4 changes: 2 additions & 2 deletions speech/cloud-client/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@
<!-- [START dependencies] -->
<dependency>
<groupId>com.google.cloud</groupId>
<artifactId>google-cloud-speech</artifactId>
<version>0.8.1-alpha</version>
<artifactId>google-cloud</artifactId>
<version>0.11.2-alpha</version>
</dependency>
<!-- [END dependencies] -->

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,13 @@

// [START speech_quickstart]
// Imports the Google Cloud client library
import com.google.cloud.speech.spi.v1beta1.SpeechClient;
import com.google.cloud.speech.v1beta1.RecognitionAudio;
import com.google.cloud.speech.v1beta1.RecognitionConfig;
import com.google.cloud.speech.v1beta1.RecognitionConfig.AudioEncoding;
import com.google.cloud.speech.v1beta1.SpeechRecognitionAlternative;
import com.google.cloud.speech.v1beta1.SpeechRecognitionResult;
import com.google.cloud.speech.v1beta1.SyncRecognizeResponse;
import com.google.cloud.speech.spi.v1.SpeechClient;
import com.google.cloud.speech.v1.RecognitionAudio;
import com.google.cloud.speech.v1.RecognitionConfig;
import com.google.cloud.speech.v1.RecognitionConfig.AudioEncoding;
import com.google.cloud.speech.v1.RecognizeResponse;
import com.google.cloud.speech.v1.SpeechRecognitionAlternative;
import com.google.cloud.speech.v1.SpeechRecognitionResult;
import com.google.protobuf.ByteString;

import java.nio.file.Files;
Expand All @@ -48,14 +48,15 @@ public static void main(String... args) throws Exception {
// Builds the sync recognize request
RecognitionConfig config = RecognitionConfig.newBuilder()
.setEncoding(AudioEncoding.LINEAR16)
.setSampleRate(16000)
.setSampleRateHertz(16000)
.setLanguageCode("en-US")
.build();
RecognitionAudio audio = RecognitionAudio.newBuilder()
.setContent(audioBytes)
.build();

// Performs speech recognition on the audio file
SyncRecognizeResponse response = speech.syncRecognize(config, audio);
RecognizeResponse response = speech.recognize(config, audio);
List<SpeechRecognitionResult> results = response.getResultsList();

for (SpeechRecognitionResult result: results) {
Expand Down
138 changes: 119 additions & 19 deletions speech/cloud-client/src/main/java/com/example/speech/Recognize.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,22 @@

package com.example.speech;

import com.google.api.gax.core.ApiStreamObserver;
import com.google.api.gax.grpc.OperationFuture;
import com.google.cloud.speech.spi.v1beta1.SpeechClient;
import com.google.cloud.speech.v1beta1.AsyncRecognizeResponse;
import com.google.cloud.speech.v1beta1.RecognitionAudio;
import com.google.cloud.speech.v1beta1.RecognitionConfig;
import com.google.cloud.speech.v1beta1.RecognitionConfig.AudioEncoding;
import com.google.cloud.speech.v1beta1.SpeechRecognitionAlternative;
import com.google.cloud.speech.v1beta1.SpeechRecognitionResult;
import com.google.cloud.speech.v1beta1.SyncRecognizeResponse;
import com.google.api.gax.grpc.StreamingCallable;
import com.google.cloud.speech.spi.v1.SpeechClient;
import com.google.cloud.speech.v1.LongRunningRecognizeResponse;
import com.google.cloud.speech.v1.RecognitionAudio;
import com.google.cloud.speech.v1.RecognitionConfig;
import com.google.cloud.speech.v1.RecognitionConfig.AudioEncoding;
import com.google.cloud.speech.v1.RecognizeResponse;
import com.google.cloud.speech.v1.SpeechRecognitionAlternative;
import com.google.cloud.speech.v1.SpeechRecognitionResult;
import com.google.cloud.speech.v1.StreamingRecognitionConfig;
import com.google.cloud.speech.v1.StreamingRecognitionResult;
import com.google.cloud.speech.v1.StreamingRecognizeRequest;
import com.google.cloud.speech.v1.StreamingRecognizeResponse;
import com.google.common.util.concurrent.SettableFuture;
import com.google.protobuf.ByteString;

import java.io.IOException;
Expand All @@ -40,7 +47,7 @@ public static void main(String... args) throws Exception {
System.out.printf(
"\tjava %s \"<command>\" \"<path-to-image>\"\n"
+ "Commands:\n"
+ "\tsyncrecognize | asyncrecognize\n"
+ "\tsyncrecognize | asyncrecognize | streamrecognize\n"
+ "Path:\n\tA file path (ex: ./resources/audio.raw) or a URI "
+ "for a Cloud Storage resource (gs://...)\n",
Recognize.class.getCanonicalName());
Expand All @@ -62,7 +69,11 @@ public static void main(String... args) throws Exception {
} else {
asyncRecognizeFile(path);
}
} else if (command.equals("streamrecognize")) {
streamingRecognizeFile(path);
//streamingRecognizeEasy(path);
}

}

/**
Expand All @@ -80,14 +91,15 @@ public static void syncRecognizeFile(String fileName) throws Exception, IOExcept
// Configure request with local raw PCM audio
RecognitionConfig config = RecognitionConfig.newBuilder()
.setEncoding(AudioEncoding.LINEAR16)
.setSampleRate(16000)
.setLanguageCode("en-US")
.setSampleRateHertz(16000)
.build();
RecognitionAudio audio = RecognitionAudio.newBuilder()
.setContent(audioBytes)
.build();

// Use blocking call to get audio transcript
SyncRecognizeResponse response = speech.syncRecognize(config, audio);
RecognizeResponse response = speech.recognize(config, audio);
List<SpeechRecognitionResult> results = response.getResultsList();

for (SpeechRecognitionResult result: results) {
Expand All @@ -111,14 +123,15 @@ public static void syncRecognizeGcs(String gcsUri) throws Exception, IOException
// Builds the request for remote FLAC file
RecognitionConfig config = RecognitionConfig.newBuilder()
.setEncoding(AudioEncoding.FLAC)
.setSampleRate(16000)
.setLanguageCode("en-US")
.setSampleRateHertz(16000)
.build();
RecognitionAudio audio = RecognitionAudio.newBuilder()
.setUri(gcsUri)
.build();

// Use blocking call for getting audio transcript
SyncRecognizeResponse response = speech.syncRecognize(config, audio);
RecognizeResponse response = speech.recognize(config, audio);
List<SpeechRecognitionResult> results = response.getResultsList();

for (SpeechRecognitionResult result: results) {
Expand All @@ -130,6 +143,7 @@ public static void syncRecognizeGcs(String gcsUri) throws Exception, IOException
speech.close();
}

/*
/**
* Performs non-blocking speech recognition on raw PCM audio and prints
* the transcription.
Expand All @@ -147,14 +161,16 @@ public static void asyncRecognizeFile(String fileName) throws Exception, IOExcep
// Configure request with local raw PCM audio
RecognitionConfig config = RecognitionConfig.newBuilder()
.setEncoding(AudioEncoding.LINEAR16)
.setSampleRate(16000)
.setLanguageCode("en-US")
.setSampleRateHertz(16000)
.build();
RecognitionAudio audio = RecognitionAudio.newBuilder()
.setContent(audioBytes)
.build();

// Use non-blocking call for getting file transcription
OperationFuture<AsyncRecognizeResponse> response = speech.asyncRecognizeAsync(config, audio);
OperationFuture<LongRunningRecognizeResponse> response =
speech.longRunningRecognizeAsync(config, audio);
while (!response.isDone()) {
System.out.println("Waiting for response...");
Thread.sleep(200);
Expand All @@ -175,23 +191,25 @@ public static void asyncRecognizeFile(String fileName) throws Exception, IOExcep
* Performs non-blocking speech recognition on remote FLAC file and prints
* the transcription.
*
* @param gcsUri the path to the remote FLAC audio file to transcribe.
* @param gcsUri the path to the remote LINEAR16 audio file to transcribe.
*/
public static void asyncRecognizeGcs(String gcsUri) throws Exception, IOException {
// Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS
SpeechClient speech = SpeechClient.create();

// Configure remote file request for FLAC file
// Configure remote file request for Linear16
RecognitionConfig config = RecognitionConfig.newBuilder()
.setEncoding(AudioEncoding.FLAC)
.setSampleRate(16000)
.setLanguageCode("en-US")
.setSampleRateHertz(16000)
.build();
RecognitionAudio audio = RecognitionAudio.newBuilder()
.setUri(gcsUri)
.build();

// Use non-blocking call for getting file transcription
OperationFuture<AsyncRecognizeResponse> response = speech.asyncRecognizeAsync(config, audio);
OperationFuture<LongRunningRecognizeResponse> response =
speech.longRunningRecognizeAsync(config, audio);
while (!response.isDone()) {
System.out.println("Waiting for response...");
Thread.sleep(200);
Expand All @@ -207,4 +225,86 @@ public static void asyncRecognizeGcs(String gcsUri) throws Exception, IOExceptio
}
speech.close();
}

/**
* Performs streaming speech recognition on raw PCM audio data.
*
* @param fileName the path to a PCM audio file to transcribe.
*/
public static void streamingRecognizeFile(String fileName) throws Exception, IOException {
Path path = Paths.get(fileName);
byte[] data = Files.readAllBytes(path);

// Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS
SpeechClient speech = SpeechClient.create();

// Configure request with local raw PCM audio
RecognitionConfig recConfig = RecognitionConfig.newBuilder()
.setEncoding(AudioEncoding.LINEAR16)
.setLanguageCode("en-US")
.setSampleRateHertz(16000)
.build();
StreamingRecognitionConfig config = StreamingRecognitionConfig.newBuilder()
.setConfig(recConfig)
.build();

class ResponseApiStreamingObserver<T> implements ApiStreamObserver<T> {
private final SettableFuture<List<T>> future = SettableFuture.create();
private final List<T> messages = new java.util.ArrayList<T>();

@Override
public void onNext(T message) {
messages.add(message);
}

@Override
public void onError(Throwable t) {
future.setException(t);
}

@Override
public void onCompleted() {
future.set(messages);
}

// Returns the SettableFuture object to get received messages / exceptions.
public SettableFuture<List<T>> future() {
return future;
}
}

ResponseApiStreamingObserver<StreamingRecognizeResponse> responseObserver =
new ResponseApiStreamingObserver<StreamingRecognizeResponse>();

StreamingCallable<StreamingRecognizeRequest,StreamingRecognizeResponse> callable =
speech.streamingRecognizeCallable();

ApiStreamObserver<StreamingRecognizeRequest> requestObserver =
callable.bidiStreamingCall(responseObserver);

// The first request must **only** contain the audio configuration:
requestObserver.onNext(StreamingRecognizeRequest.newBuilder()
.setStreamingConfig(config)
.build());

// Subsequent requests must **only** contain the audio data.
requestObserver.onNext(StreamingRecognizeRequest.newBuilder()
.setAudioContent(ByteString.copyFrom(data))
.build());

// Mark transmission as completed after sending the data.
requestObserver.onCompleted();

List<StreamingRecognizeResponse> responses = responseObserver.future().get();

for (StreamingRecognizeResponse response: responses) {
for (StreamingRecognitionResult result: response.getResultsList()) {
for (SpeechRecognitionAlternative alternative : result.getAlternativesList()) {
System.out.println(alternative.getTranscript());
}
}
}
speech.close();
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -61,25 +61,32 @@ public void testRecognizeFile() throws Exception {
String got = bout.toString();
assertThat(got).contains("how old is the Brooklyn Bridge");
}

@Test
public void testRecognizeGcs() throws Exception {
Recognize.syncRecognizeGcs(gcsPath);
String got = bout.toString();
assertThat(got).contains("how old is the Brooklyn Bridge");
}

@Test
public void testAsyncRecognizeFile() throws Exception {
Recognize.asyncRecognizeFile(fileName);
String got = bout.toString();
assertThat(got).contains("how old is the Brooklyn Bridge");
}

@Test
public void testAsyncRecognizeGcs() throws Exception {
Recognize.asyncRecognizeGcs(gcsPath);
String got = bout.toString();
assertThat(got).contains("how old is the Brooklyn Bridge");
}

@Test
public void testStreamRecognize() throws Exception {
Recognize.streamingRecognizeFile(fileName);
String got = bout.toString();
assertThat(got).contains("how old is the Brooklyn Bridge");
}
}