From 8e0c33b59e6bc0c9d003ebf41cab5ec8c105a165 Mon Sep 17 00:00:00 2001
From: nirupa-kumar <nirupakumar@google.com>
Date: Thu, 16 Aug 2018 15:35:59 -0700
Subject: [PATCH 1/4] Microphone streaming with a 1 minute duration.

---
 speech/cloud-client/README.md                 |   5 +
 .../java/com/example/speech/Recognize.java    | 119 +++++++++++++++++-
 2 files changed, 119 insertions(+), 5 deletions(-)
diff --git a/speech/cloud-client/README.md b/speech/cloud-client/README.md
index 9a7055f32ff..98d6c69cfc4 100644
--- a/speech/cloud-client/README.md
+++ b/speech/cloud-client/README.md
@@ -92,6 +92,11 @@ Performing streaming speech transcription and punctuation on an audio file
 mvn exec:java -DRecognize -Dexec.args="stream-punctuation ./resources/audio.raw"
 ```
 
+Perform microphone streaming speech recognition
+```
+mvn exec:java -DRecognize -Dexec.args="micstreamrecognize"
+```
+
 ## Enhanced Model
 Transcribe an audio file using an enhanced model
 ```
diff --git a/speech/cloud-client/src/main/java/com/example/speech/Recognize.java b/speech/cloud-client/src/main/java/com/example/speech/Recognize.java
index 9771ad2a8e9..29cd1f45712 100644
--- a/speech/cloud-client/src/main/java/com/example/speech/Recognize.java
+++ b/speech/cloud-client/src/main/java/com/example/speech/Recognize.java
@@ -19,6 +19,9 @@
 import com.google.api.gax.longrunning.OperationFuture;
 import com.google.api.gax.rpc.ApiStreamObserver;
 import com.google.api.gax.rpc.BidiStreamingCallable;
+import com.google.api.gax.rpc.ClientStream;
+import com.google.api.gax.rpc.ResponseObserver;
+import com.google.api.gax.rpc.StreamController;
 import com.google.cloud.speech.v1p1beta1.LongRunningRecognizeMetadata;
 import com.google.cloud.speech.v1p1beta1.LongRunningRecognizeResponse;
 import com.google.cloud.speech.v1p1beta1.RecognitionAudio;
@@ -47,6 +50,13 @@
 import java.util.ArrayList;
 import java.util.List;
 
+import javax.sound.sampled.AudioFormat;
+import javax.sound.sampled.AudioInputStream;
+import javax.sound.sampled.AudioSystem;
+import javax.sound.sampled.DataLine;
+import javax.sound.sampled.DataLine.Info;
+import javax.sound.sampled.TargetDataLine;
+
 public class Recognize {
 
   /** Run speech recognition tasks. */
@@ -56,7 +66,7 @@ public static void main(String... args) throws Exception {
       System.out.printf(
           "\tjava %s \"<command>\" \"<path-to-image>\"\n"
               + "Commands:\n"
-              + "\tsyncrecognize | asyncrecognize | streamrecognize | wordoffsets\n"
+              + "\tsyncrecognize | asyncrecognize | streamrecognize | micstreamrecognize | wordoffsets\n"
               + "\t| model-selection | auto-punctuation | stream-punctuation | enhanced-model\n"
               + "\t| metadata | diarization | multi-channel | multi-language | word-level-conf"
               + "Path:\n\tA file path (ex: ./resources/audio.raw) or a URI "
@@ -88,6 +98,8 @@ public static void main(String... args) throws Exception {
       }
     } else if (command.equals("streamrecognize")) {
       streamingRecognizeFile(path);
+    } else if (command.equals("micstreamrecognize")) {
+      streamingMicRecognize();
     } else if (command.equals("model-selection")) {
       if (path.startsWith("gs://")) {
         transcribeModelSelectionGcs(path);
@@ -704,6 +716,101 @@ public SettableFuture<List<T>> future() {
   }
   // [END speech_stream_recognize_punctuation]
 
+  // [START speech_streaming_mic_recognize]
+
+  /**
+   * Performs microphone streaming speech recognition with a duration of 1 minute.
+   *
+   * @throws Exception
+   */
+  public static void streamingMicRecognize() throws Exception {
+    AudioFormat audioFormat = new AudioFormat(16000, 16, 1, true, false);
+    DataLine.Info targetInfo = new Info(TargetDataLine.class, audioFormat);
+    TargetDataLine targetDataLine;
+    int BYTES_PER_BUFFER = 6400; // buffer size in bytes
+    int durationMillSec = 60 * 1000; // 60 seconds
+    if (!AudioSystem.isLineSupported(targetInfo)) {
+      System.out.println("Microphone not supported");
+      System.exit(0);
+    }
+
+    ResponseObserver<StreamingRecognizeResponse> responseObserver = null;
+    try (SpeechClient client = SpeechClient.create()) {
+
+      responseObserver =
+          new ResponseObserver<StreamingRecognizeResponse>() {
+            ArrayList<StreamingRecognizeResponse> responses = new ArrayList<>();
+
+            public void onStart(StreamController controller) {}
+
+            public void onResponse(StreamingRecognizeResponse response) {
+              responses.add(response);
+            }
+
+            public void onComplete() {
+              for (StreamingRecognizeResponse response : responses) {
+                StreamingRecognitionResult result = response.getResultsList().get(0);
+                SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
+                System.out.printf("Transcript : %s\n", alternative.getTranscript());
+              }
+            }
+
+            public void onError(Throwable t) {
+              System.out.println(t);
+            }
+          };
+
+      ClientStream<StreamingRecognizeRequest> clientStream =
+          client.streamingRecognizeCallable().splitCall(responseObserver);
+
+      RecognitionConfig recConfig =
+          RecognitionConfig.newBuilder()
+              .setEncoding(RecognitionConfig.AudioEncoding.LINEAR16)
+              .setLanguageCode("en-US")
+              .setSampleRateHertz(16000)
+              .build();
+      StreamingRecognitionConfig config =
+          StreamingRecognitionConfig.newBuilder().setConfig(recConfig).build();
+
+      StreamingRecognizeRequest request =
+          StreamingRecognizeRequest.newBuilder()
+              .setStreamingConfig(config)
+              .build(); // The first request in a streaming call has to be a config
+
+      clientStream.send(request);
+
+      // Get the target data line
+      targetDataLine = (TargetDataLine) AudioSystem.getLine(targetInfo);
+      targetDataLine.open(audioFormat);
+      targetDataLine.start();
+      System.out.println("Start speaking");
+      long startTime = System.currentTimeMillis();
+      // Audio Input Stream
+      AudioInputStream audio = new AudioInputStream(targetDataLine);
+      while (true) {
+        long estimatedTime = System.currentTimeMillis() - startTime;
+        byte[] data = new byte[BYTES_PER_BUFFER];
+        audio.read(data);
+        if (estimatedTime > durationMillSec) {
+          System.out.println("Stop speaking.");
+          targetDataLine.stop();
+          targetDataLine.close();
+          break;
+        }
+        request =
+            StreamingRecognizeRequest.newBuilder()
+                .setAudioContent(ByteString.copyFrom(data))
+                .build();
+        clientStream.send(request);
+      }
+    } catch (Exception e) {
+      System.out.println(e);
+    }
+    responseObserver.onComplete();
+  }
+
+  // [END speech_streaming_mic_recognize]
+
   // [START speech_transcribe_file_with_enhanced_model]
   /**
    * Transcribe the given audio file using an enhanced model.
@@ -833,8 +940,9 @@ public static void transcribeDiarization(String fileName) throws Exception {
         SpeechRecognitionAlternative alternative = result.getAlternatives(0);
         System.out.format("Transcript : %s\n", alternative.getTranscript());
         // The words array contains the entire transcript up until that point.
-        //Referencing the last spoken word to get the associated Speaker tag
-        System.out.format("Speaker Tag %s: %s\n",
+        // Referencing the last spoken word to get the associated Speaker tag
+        System.out.format(
+            "Speaker Tag %s: %s\n",
             alternative.getWords((alternative.getWordsCount() - 1)).getSpeakerTag(),
             alternative.getTranscript());
       }
@@ -877,8 +985,9 @@ public static void transcribeDiarizationGcs(String gcsUri) throws Exception {
         // use the first (most likely) one here.
         SpeechRecognitionAlternative alternative = result.getAlternatives(0);
         // The words array contains the entire transcript up until that point.
-        //Referencing the last spoken word to get the associated Speaker tag
-        System.out.format("Speaker Tag %s:%s\n",
+        // Referencing the last spoken word to get the associated Speaker tag
+        System.out.format(
+            "Speaker Tag %s:%s\n",
             alternative.getWords((alternative.getWordsCount() - 1)).getSpeakerTag(),
             alternative.getTranscript());
       }

From 7b6b10806aacba05453e79519910668696517dcc Mon Sep 17 00:00:00 2001
From: nirupa-kumar <nirupakumar@google.com>
Date: Thu, 16 Aug 2018 15:58:43 -0700
Subject: [PATCH 2/4] Fixed audit issues.

---
 .../src/main/java/com/example/speech/Recognize.java   | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/speech/cloud-client/src/main/java/com/example/speech/Recognize.java b/speech/cloud-client/src/main/java/com/example/speech/Recognize.java
index 29cd1f45712..61dbd1b95aa 100644
--- a/speech/cloud-client/src/main/java/com/example/speech/Recognize.java
+++ b/speech/cloud-client/src/main/java/com/example/speech/Recognize.java
@@ -66,9 +66,10 @@ public static void main(String... args) throws Exception {
       System.out.printf(
           "\tjava %s \"<command>\" \"<path-to-image>\"\n"
               + "Commands:\n"
-              + "\tsyncrecognize | asyncrecognize | streamrecognize | micstreamrecognize | wordoffsets\n"
-              + "\t| model-selection | auto-punctuation | stream-punctuation | enhanced-model\n"
-              + "\t| metadata | diarization | multi-channel | multi-language | word-level-conf"
+              + "\tsyncrecognize | asyncrecognize | streamrecognize | micstreamrecognize \n"
+              + "\t| wordoffsets | model-selection | auto-punctuation | stream-punctuation \n"
+              + "\t| enhanced-model| metadata | diarization | multi-channel | multi-language \n"
+              + "\t | word-level-conf"
               + "Path:\n\tA file path (ex: ./resources/audio.raw) or a URI "
               + "for a Cloud Storage resource (gs://...)\n",
           Recognize.class.getCanonicalName());
@@ -721,13 +722,11 @@ public SettableFuture<List<T>> future() {
   /**
    * Performs microphone streaming speech recognition with a duration of 1 minute.
    *
-   * @throws Exception
    */
   public static void streamingMicRecognize() throws Exception {
     AudioFormat audioFormat = new AudioFormat(16000, 16, 1, true, false);
     DataLine.Info targetInfo = new Info(TargetDataLine.class, audioFormat);
     TargetDataLine targetDataLine;
-    int BYTES_PER_BUFFER = 6400; // buffer size in bytes
     int durationMillSec = 60 * 1000; // 60 seconds
     if (!AudioSystem.isLineSupported(targetInfo)) {
       System.out.println("Microphone not supported");
@@ -789,7 +788,7 @@ public void onError(Throwable t) {
       AudioInputStream audio = new AudioInputStream(targetDataLine);
       while (true) {
         long estimatedTime = System.currentTimeMillis() - startTime;
-        byte[] data = new byte[BYTES_PER_BUFFER];
+        byte[] data = new byte[6400];
         audio.read(data);
         if (estimatedTime > durationMillSec) {
           System.out.println("Stop speaking.");

From 838f7a3133f2dcdb5434e91efa00b2b2f5c3f7c6 Mon Sep 17 00:00:00 2001
From: nirupa-kumar <nirupakumar@google.com>
Date: Fri, 17 Aug 2018 11:10:09 -0700
Subject: [PATCH 3/4] Fixing issues after review.

---
 .../java/com/example/speech/Recognize.java    | 39 +++++++++----------
 1 file changed, 19 insertions(+), 20 deletions(-)

diff --git a/speech/cloud-client/src/main/java/com/example/speech/Recognize.java b/speech/cloud-client/src/main/java/com/example/speech/Recognize.java
index 61dbd1b95aa..fa29233313c 100644
--- a/speech/cloud-client/src/main/java/com/example/speech/Recognize.java
+++ b/speech/cloud-client/src/main/java/com/example/speech/Recognize.java
@@ -718,20 +718,8 @@ public SettableFuture<List<T>> future() {
   // [END speech_stream_recognize_punctuation]
 
   // [START speech_streaming_mic_recognize]
-
-  /**
-   * Performs microphone streaming speech recognition with a duration of 1 minute.
-   *
-   */
+  /** Performs microphone streaming speech recognition with a duration of 1 minute. */
   public static void streamingMicRecognize() throws Exception {
-    AudioFormat audioFormat = new AudioFormat(16000, 16, 1, true, false);
-    DataLine.Info targetInfo = new Info(TargetDataLine.class, audioFormat);
-    TargetDataLine targetDataLine;
-    int durationMillSec = 60 * 1000; // 60 seconds
-    if (!AudioSystem.isLineSupported(targetInfo)) {
-      System.out.println("Microphone not supported");
-      System.exit(0);
-    }
 
     ResponseObserver<StreamingRecognizeResponse> responseObserver = null;
     try (SpeechClient client = SpeechClient.create()) {
@@ -762,23 +750,35 @@ public void onError(Throwable t) {
       ClientStream<StreamingRecognizeRequest> clientStream =
           client.streamingRecognizeCallable().splitCall(responseObserver);
 
-      RecognitionConfig recConfig =
+      RecognitionConfig recognitionConfig =
           RecognitionConfig.newBuilder()
               .setEncoding(RecognitionConfig.AudioEncoding.LINEAR16)
               .setLanguageCode("en-US")
               .setSampleRateHertz(16000)
               .build();
-      StreamingRecognitionConfig config =
-          StreamingRecognitionConfig.newBuilder().setConfig(recConfig).build();
+      StreamingRecognitionConfig streamingRecognitionConfig =
+          StreamingRecognitionConfig.newBuilder().setConfig(recognitionConfig).build();
 
       StreamingRecognizeRequest request =
           StreamingRecognizeRequest.newBuilder()
-              .setStreamingConfig(config)
+              .setStreamingConfig(streamingRecognitionConfig)
               .build(); // The first request in a streaming call has to be a config
 
       clientStream.send(request);
-
-      // Get the target data line
+      // SampleRate:16000Hz, SampleSizeInBits: 16, Number of channels: 1, Signed: true,
+      // bigEndian: false
+      AudioFormat audioFormat = new AudioFormat(16000, 16, 1, true, false);
+      DataLine.Info targetInfo =
+          new Info(
+              TargetDataLine.class,
+              audioFormat); // Set the system information to read from the microphone audio stream
+      TargetDataLine targetDataLine;
+      int durationMillSec = 60 * 1000; // 60 seconds
+      if (!AudioSystem.isLineSupported(targetInfo)) {
+        System.out.println("Microphone not supported");
+        System.exit(0);
+      }
+      //Target data line captures the audio stream the microphone produces.
       targetDataLine = (TargetDataLine) AudioSystem.getLine(targetInfo);
       targetDataLine.open(audioFormat);
       targetDataLine.start();
@@ -807,7 +807,6 @@ public void onError(Throwable t) {
     }
     responseObserver.onComplete();
   }
-
   // [END speech_streaming_mic_recognize]
 
   // [START speech_transcribe_file_with_enhanced_model]

From 5cf46b90e8c395d646ff32c7d92e6b68fd1c21d2 Mon Sep 17 00:00:00 2001
From: nirupa-kumar <nirupakumar@google.com>
Date: Fri, 17 Aug 2018 11:46:40 -0700
Subject: [PATCH 4/4] Fixing review issues.

---
 .../src/main/java/com/example/speech/Recognize.java      | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/speech/cloud-client/src/main/java/com/example/speech/Recognize.java b/speech/cloud-client/src/main/java/com/example/speech/Recognize.java
index fa29233313c..15beaba3e1a 100644
--- a/speech/cloud-client/src/main/java/com/example/speech/Recognize.java
+++ b/speech/cloud-client/src/main/java/com/example/speech/Recognize.java
@@ -772,14 +772,13 @@ public void onError(Throwable t) {
           new Info(
               TargetDataLine.class,
               audioFormat); // Set the system information to read from the microphone audio stream
-      TargetDataLine targetDataLine;
-      int durationMillSec = 60 * 1000; // 60 seconds
+
       if (!AudioSystem.isLineSupported(targetInfo)) {
         System.out.println("Microphone not supported");
         System.exit(0);
       }
-      //Target data line captures the audio stream the microphone produces.
-      targetDataLine = (TargetDataLine) AudioSystem.getLine(targetInfo);
+      // Target data line captures the audio stream the microphone produces.
+      TargetDataLine targetDataLine = (TargetDataLine) AudioSystem.getLine(targetInfo);
       targetDataLine.open(audioFormat);
       targetDataLine.start();
       System.out.println("Start speaking");
@@ -790,7 +789,7 @@ public void onError(Throwable t) {
         long estimatedTime = System.currentTimeMillis() - startTime;
         byte[] data = new byte[6400];
         audio.read(data);
-        if (estimatedTime > durationMillSec) {
+        if (estimatedTime > 60000) { // 60 seconds
           System.out.println("Stop speaking.");
           targetDataLine.stop();
           targetDataLine.close();