GoogleCloudPlatform · happyhuman · Jul 20, 2018 · Jul 19, 2018 · Jul 19, 2018 · Jul 20, 2018
diff --git a/speech/cloud-client/README.rst b/speech/cloud-client/README.rst
@@ -221,25 +221,22 @@ To run this sample:
 
     $ python beta_snippets.py
 
-    usage: beta_snippets.py [-h] command path [first] [second]
+    usage: beta_snippets.py [-h] command
 
     Google Cloud Speech API sample that demonstrates enhanced models
     and recognition metadata.
 
     Example usage:
-        python beta_snippets.py enhanced-model resources/commercial_mono.wav
-        python beta_snippets.py metadata resources/commercial_mono.wav
-        python beta_snippets.py punctuation resources/commercial_mono.wav
-        python beta_snippets.py diarization resources/commercial_mono.wav
-        python beta_snippets.py multi-channel resources/commercial_mono.wav
-        python beta_snippets.py multi-language resources/multi.wav en-US es
-        python beta_snippets.py word-level-conf resources/commercial_mono.wav
+        python beta_snippets.py enhanced-model
+        python beta_snippets.py metadata
+        python beta_snippets.py punctuation
+        python beta_snippets.py diarization
+        python beta_snippets.py multi-channel
+        python beta_snippets.py multi-language
+        python beta_snippets.py word-level-conf
 
     positional arguments:
       command
-      path        File for audio file to be recognized
-      first       First language in audio file to be recognized
-      second      Second language in audio file to be recognized
 
     optional arguments:
       -h, --help  show this help message and exit

diff --git a/speech/cloud-client/beta_snippets.py b/speech/cloud-client/beta_snippets.py
@@ -18,34 +18,34 @@
 and recognition metadata.
 
 Example usage:
-    python beta_snippets.py enhanced-model resources/commercial_mono.wav
-    python beta_snippets.py metadata resources/commercial_mono.wav
-    python beta_snippets.py punctuation resources/commercial_mono.wav
-    python beta_snippets.py diarization resources/commercial_mono.wav
-    python beta_snippets.py multi-channel resources/commercial_mono.wav
-    python beta_snippets.py multi-language resources/multi.wav en-US es
-    python beta_snippets.py word-level-conf resources/commercial_mono.wav
+    python beta_snippets.py enhanced-model
+    python beta_snippets.py metadata
+    python beta_snippets.py punctuation
+    python beta_snippets.py diarization
+    python beta_snippets.py multi-channel
+    python beta_snippets.py multi-language
+    python beta_snippets.py word-level-conf
 """
 
 import argparse
 import io
 
 
-def transcribe_file_with_enhanced_model(speech_file):
+def transcribe_file_with_enhanced_model():
     """Transcribe the given audio file using an enhanced model."""
     # [START speech_transcribe_file_with_enhanced_model]
     from google.cloud import speech_v1p1beta1 as speech
     client = speech.SpeechClient()
 
-    # TODO(developer): Uncomment and set to a path to your audio file.
-    # speech_file = 'path/to/file.wav'
+    speech_file = 'resources/commercial_mono.wav'
 
     with io.open(speech_file, 'rb') as audio_file:
         content = audio_file.read()
 
     audio = speech.types.RecognitionAudio(content=content)
     config = speech.types.RecognitionConfig(
         encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16,
+        sample_rate_hertz=8000,
         language_code='en-US',
         # Enhanced models are only available to projects that
         # opt in for audio data collection.
@@ -63,14 +63,13 @@ def transcribe_file_with_enhanced_model(speech_file):
     # [END speech_transcribe_file_with_enhanced_model]
 
 
-def transcribe_file_with_metadata(speech_file):
+def transcribe_file_with_metadata():
     """Send a request that includes recognition metadata."""
     # [START speech_transcribe_file_with_metadata]
     from google.cloud import speech_v1p1beta1 as speech
     client = speech.SpeechClient()
 
-    # TODO(developer): Uncomment and set to a path to your audio file.
-    # speech_file = 'path/to/file.wav'
+    speech_file = 'resources/commercial_mono.wav'
 
     with io.open(speech_file, 'rb') as audio_file:
         content = audio_file.read()
@@ -94,6 +93,7 @@ def transcribe_file_with_metadata(speech_file):
     audio = speech.types.RecognitionAudio(content=content)
     config = speech.types.RecognitionConfig(
         encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16,
+        sample_rate_hertz=8000,
         language_code='en-US',
         # Add this in the request to send metadata.
         metadata=metadata)
@@ -108,21 +108,21 @@ def transcribe_file_with_metadata(speech_file):
     # [END speech_transcribe_file_with_metadata]
 
 
-def transcribe_file_with_auto_punctuation(speech_file):
+def transcribe_file_with_auto_punctuation():
     """Transcribe the given audio file with auto punctuation enabled."""
     # [START speech_transcribe_file_with_auto_punctuation]
     from google.cloud import speech_v1p1beta1 as speech
     client = speech.SpeechClient()
 
-    # TODO(developer): Uncomment and set to a path to your audio file.
-    # speech_file = 'path/to/file.wav'
+    speech_file = 'resources/commercial_mono.wav'
 
     with io.open(speech_file, 'rb') as audio_file:
         content = audio_file.read()
 
     audio = speech.types.RecognitionAudio(content=content)
     config = speech.types.RecognitionConfig(
         encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16,
+        sample_rate_hertz=8000,
         language_code='en-US',
         # Enable automatic punctuation
         enable_automatic_punctuation=True)
@@ -137,14 +137,13 @@ def transcribe_file_with_auto_punctuation(speech_file):
     # [END speech_transcribe_file_with_auto_punctuation]
 
 
-def transcribe_file_with_diarization(speech_file):
+def transcribe_file_with_diarization():
     """Transcribe the given audio file synchronously with diarization."""
     # [START speech_transcribe_diarization]
     from google.cloud import speech_v1p1beta1 as speech
     client = speech.SpeechClient()
 
-    # TODO(developer): Uncomment and set to a path to your audio file.
-    # speech_file = 'path/to/file.wav'
+    speech_file = 'resources/commercial_mono.wav'
 
     with open(speech_file, 'rb') as audio_file:
         content = audio_file.read()
@@ -153,30 +152,42 @@ def transcribe_file_with_diarization(speech_file):
 
     config = speech.types.RecognitionConfig(
         encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16,
+        sample_rate_hertz=8000,
         language_code='en-US',
         enable_speaker_diarization=True,
         diarization_speaker_count=2)
 
     print('Waiting for operation to complete...')
     response = client.recognize(config, audio)
 
+    # response.results contains partial results with the last item
+    # containing the entire result:
     result = response.results[-1]
+
     words_info = result.alternatives[0].words
-    pieces = ['%s (%s)' % (word_info.word, word_info.speaker_tag)
-              for word_info in words_info]
-    print(' '.join(pieces))
+
+    # Separating the words by who said what:
+    speakers_words = []
+    for word_info in words_info:
+        if speakers_words and speakers_words[-1][0] == word_info.speaker_tag:
+            speakers_words[-1][1].append(word_info.word)
+        else:
+            speakers_words.append((word_info.speaker_tag, [word_info.word, ]))
+
+    # Printing the output based on who said what:
+    for speaker_tag, words in speakers_words:
+        print('Speaker #{}: {}'.format(speaker_tag, ' '.join(words)))
     # [END speech_transcribe_diarization]
 
 
-def transcribe_file_with_multichannel(speech_file):
+def transcribe_file_with_multichannel():
     """Transcribe the given audio file synchronously with
       multi channel."""
     # [START speech_transcribe_multichannel]
     from google.cloud import speech_v1p1beta1 as speech
     client = speech.SpeechClient()
 
-    # TODO(developer): Uncomment and set to a path to your audio file.
-    # speech_file = 'path/to/file.wav'
+    speech_file = 'resources/Google_Gnome.wav'
 
     with open(speech_file, 'rb') as audio_file:
         content = audio_file.read()
@@ -201,17 +212,16 @@ def transcribe_file_with_multichannel(speech_file):
     # [END speech_transcribe_multichannel]
 
 
-def transcribe_file_with_multilanguage(speech_file, first_lang, second_lang):
+def transcribe_file_with_multilanguage():
     """Transcribe the given audio file synchronously with
       multi language."""
     # [START speech_transcribe_multilanguage]
     from google.cloud import speech_v1p1beta1 as speech
     client = speech.SpeechClient()
 
-    # TODO(developer): Uncomment and set to a path to your audio file.
-    # speech_file = 'path/to/file.wav'
-    # first_lang = first language code, e,g, 'en-US'
-    # second_lang = first language code, e,g, 'es'
+    speech_file = 'resources/multi.wav'
+    first_lang = 'en-US'
+    second_lang = 'es'
 
     with open(speech_file, 'rb') as audio_file:
         content = audio_file.read()
@@ -220,6 +230,7 @@ def transcribe_file_with_multilanguage(speech_file, first_lang, second_lang):
 
     config = speech.types.RecognitionConfig(
         encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16,
+        sample_rate_hertz=44100,
         audio_channel_count=2,
         language_code=first_lang,
         alternative_language_codes=[second_lang])
@@ -235,15 +246,14 @@ def transcribe_file_with_multilanguage(speech_file, first_lang, second_lang):
     # [END speech_transcribe_multilanguage]
 
 
-def transcribe_file_with_word_level_confidence(speech_file):
+def transcribe_file_with_word_level_confidence():
     """Transcribe the given audio file synchronously with
       word level confidence."""
     # [START speech_transcribe_word_level_confidence]
     from google.cloud import speech_v1p1beta1 as speech
     client = speech.SpeechClient()
 
-    # TODO(developer): Uncomment and set to a path to your audio file.
-    # speech_file = 'path/to/file.wav'
+    speech_file = 'resources/Google_Gnome.wav'
 
     with open(speech_file, 'rb') as audio_file:
         content = audio_file.read()
@@ -273,28 +283,20 @@ def transcribe_file_with_word_level_confidence(speech_file):
         description=__doc__,
         formatter_class=argparse.RawDescriptionHelpFormatter)
     parser.add_argument('command')
-    parser.add_argument(
-        'path', help='File for audio file to be recognized')
-    parser.add_argument(
-        'first', help='First language in audio file to be recognized',
-        nargs='?')
-    parser.add_argument(
-        'second', help='Second language in audio file to be recognized',
-        nargs='?')
 
     args = parser.parse_args()
 
     if args.command == 'enhanced-model':
-        transcribe_file_with_enhanced_model(args.path)
+        transcribe_file_with_enhanced_model()
     elif args.command == 'metadata':
-        transcribe_file_with_metadata(args.path)
+        transcribe_file_with_metadata()
     elif args.command == 'punctuation':
-        transcribe_file_with_auto_punctuation(args.path)
+        transcribe_file_with_auto_punctuation()
     elif args.command == 'diarization':
-        transcribe_file_with_diarization(args.path)
+        transcribe_file_with_diarization()
     elif args.command == 'multi-channel':
-        transcribe_file_with_multichannel(args.path)
+        transcribe_file_with_multichannel()
     elif args.command == 'multi-language':
-        transcribe_file_with_multilanguage(args.path, args.first, args.second)
+        transcribe_file_with_multilanguage()
     elif args.command == 'word-level-conf':
-        transcribe_file_with_word_level_confidence(args.path)
+        transcribe_file_with_word_level_confidence()
diff --git a/speech/cloud-client/beta_snippets_test.py b/speech/cloud-client/beta_snippets_test.py
@@ -26,56 +26,49 @@
 
 
 def test_transcribe_file_with_enhanced_model(capsys):
-    transcribe_file_with_enhanced_model(
-        os.path.join(RESOURCES, 'commercial_mono.wav'))
+    transcribe_file_with_enhanced_model()
     out, _ = capsys.readouterr()
 
     assert 'Chrome' in out
 
 
 def test_transcribe_file_with_metadata(capsys):
-    transcribe_file_with_metadata(
-        os.path.join(RESOURCES, 'commercial_mono.wav'))
+    transcribe_file_with_metadata()
     out, _ = capsys.readouterr()
 
     assert 'Chrome' in out
 
 
 def test_transcribe_file_with_auto_punctuation(capsys):
-    transcribe_file_with_auto_punctuation(
-        os.path.join(RESOURCES, 'commercial_mono.wav'))
+    transcribe_file_with_auto_punctuation()
     out, _ = capsys.readouterr()
 
     assert 'Okay. Sure.' in out
 
 
 def test_transcribe_diarization(capsys):
-    transcribe_file_with_diarization(
-        os.path.join(RESOURCES, 'commercial_mono.wav'))
+    transcribe_file_with_diarization()
     out, err = capsys.readouterr()
 
-    assert "I'm (1) here (1) hi (2)" in out
+    assert "Speaker #1: I'm here" in out
 
 
 def test_transcribe_multichannel_file(capsys):
-    transcribe_file_with_multichannel(
-        os.path.join(RESOURCES, 'Google_Gnome.wav'))
+    transcribe_file_with_multichannel()
     out, err = capsys.readouterr()
 
     assert 'OK Google stream stranger things from Netflix to my TV' in out
 
 
 def test_transcribe_multilanguage_file(capsys):
-    transcribe_file_with_multilanguage(
-        os.path.join(RESOURCES, 'multi.wav'), 'en-US', 'es')
+    transcribe_file_with_multilanguage()
     out, err = capsys.readouterr()
 
     assert 'how are you doing estoy bien e tu' in out
 
 
 def test_transcribe_word_level_confidence(capsys):
-    transcribe_file_with_word_level_confidence(
-        os.path.join(RESOURCES, 'Google_Gnome.wav'))
+    transcribe_file_with_word_level_confidence()
     out, err = capsys.readouterr()
 
     assert 'OK Google stream stranger things from Netflix to my TV' in out