diff --git a/TESTING.md b/TESTING.md index c6eb66675d5..7ca66a3ed0c 100644 --- a/TESTING.md +++ b/TESTING.md @@ -24,12 +24,6 @@ For example, to install portaudio: If you have trouble installing a necessary system library, try filing an issue. -### Troubleshooting - -#### PortAudio on OS X - -See [speech/api/README.md](speech/api/README.md). - ## Preparing a project for testing Most tests require you to have an active, billing-enabled project on the diff --git a/speech/api-client/README.rst b/speech/api-client/README.rst deleted file mode 100644 index 12e1dc1d186..00000000000 --- a/speech/api-client/README.rst +++ /dev/null @@ -1,125 +0,0 @@ -.. This file is automatically generated. Do not edit this file directly. - -Google Cloud Speech API Python Samples -=============================================================================== - -This directory contains samples for Google Cloud Speech API. `Google Cloud Speech API`_ enables easy integration of Google speech recognition technologies into developer applications. Send audio and receive a text transcription from the Cloud Speech API service. - - - - -.. _Google Cloud Speech API: https://cloud.google.com/speech/docs - -Setup -------------------------------------------------------------------------------- - - -Authentication -++++++++++++++ - -Authentication is typically done through `Application Default Credentials`_, -which means you do not have to change the code to authenticate as long as -your environment has credentials. You have a few options for setting up -authentication: - -#. When running locally, use the `Google Cloud SDK`_ - - .. code-block:: bash - - gcloud beta auth application-default login - - -#. When running on App Engine or Compute Engine, credentials are already - set-up. However, you may need to configure your Compute Engine instance - with `additional scopes`_. - -#. You can create a `Service Account key file`_. This file can be used to - authenticate to Google Cloud Platform services from any environment. To use - the file, set the ``GOOGLE_APPLICATION_CREDENTIALS`` environment variable to - the path to the key file, for example: - - .. code-block:: bash - - export GOOGLE_APPLICATION_CREDENTIALS=/path/to/service_account.json - -.. _Application Default Credentials: https://cloud.google.com/docs/authentication#getting_credentials_for_server-centric_flow -.. _additional scopes: https://cloud.google.com/compute/docs/authentication#using -.. _Service Account key file: https://developers.google.com/identity/protocols/OAuth2ServiceAccount#creatinganaccount - -Install Dependencies -++++++++++++++++++++ - -#. Install `pip`_ and `virtualenv`_ if you do not already have them. - -#. Create a virtualenv. Samples are compatible with Python 2.7 and 3.4+. - - .. code-block:: bash - - $ virtualenv env - $ source env/bin/activate - -#. Install the dependencies needed to run the samples. - - .. code-block:: bash - - $ pip install -r requirements.txt - -.. _pip: https://pip.pypa.io/ -.. _virtualenv: https://virtualenv.pypa.io/ - -Samples -------------------------------------------------------------------------------- - -Transcribe -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - - - -To run this sample: - -.. code-block:: bash - - $ python transcribe.py - - usage: transcribe.py [-h] speech_file - - Google Cloud Speech API sample application using the REST API for batch - processing. - - Example usage: python transcribe.py resources/audio.raw - - positional arguments: - speech_file Full path of audio file to be recognized - - optional arguments: - -h, --help show this help message and exit - - -Transcribe async -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - - - -To run this sample: - -.. code-block:: bash - - $ python transcribe_async.py - - usage: transcribe_async.py [-h] speech_file - - Google Cloud Speech API sample application using the REST API for async - batch processing. - - Example usage: python transcribe.py resources/audio.raw - - positional arguments: - speech_file Full path of audio file to be recognized - - optional arguments: - -h, --help show this help message and exit - - - - -.. _Google Cloud SDK: https://cloud.google.com/sdk/ \ No newline at end of file diff --git a/speech/api-client/README.rst.in b/speech/api-client/README.rst.in deleted file mode 100644 index 667b30debe0..00000000000 --- a/speech/api-client/README.rst.in +++ /dev/null @@ -1,22 +0,0 @@ -# This file is used to generate README.rst - -product: - name: Google Cloud Speech API - short_name: Cloud Speech API - url: https://cloud.google.com/speech/docs - description: > - `Google Cloud Speech API`_ enables easy integration of Google speech - recognition technologies into developer applications. Send audio and - receive a text transcription from the Cloud Speech API service. - -setup: -- auth -- install_deps - -samples: -- name: Transcribe - file: transcribe.py - show_help: true -- name: Transcribe async - file: transcribe_async.py - show_help: true diff --git a/speech/api-client/requirements.txt b/speech/api-client/requirements.txt deleted file mode 100644 index 4f77d6936d7..00000000000 --- a/speech/api-client/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -google-api-python-client==1.6.2 diff --git a/speech/api-client/resources/audio.raw b/speech/api-client/resources/audio.raw deleted file mode 100644 index 5ebf79d3c9c..00000000000 Binary files a/speech/api-client/resources/audio.raw and /dev/null differ diff --git a/speech/api-client/resources/audio2.raw b/speech/api-client/resources/audio2.raw deleted file mode 100644 index 35413b78817..00000000000 Binary files a/speech/api-client/resources/audio2.raw and /dev/null differ diff --git a/speech/api-client/transcribe.py b/speech/api-client/transcribe.py deleted file mode 100644 index 54a6ea18e4a..00000000000 --- a/speech/api-client/transcribe.py +++ /dev/null @@ -1,90 +0,0 @@ -#!/usr/bin/env python -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Google Cloud Speech API sample application using the REST API for batch -processing. - -Example usage: python transcribe.py resources/audio.raw -""" - -# [START import_libraries] -import argparse -import base64 -import json - -import googleapiclient.discovery -# [END import_libraries] - - -# [START authenticating] -# Application default credentials provided by env variable -# GOOGLE_APPLICATION_CREDENTIALS -def get_speech_service(): - return googleapiclient.discovery.build('speech', 'v1beta1') -# [END authenticating] - - -def main(speech_file): - """Transcribe the given audio file. - - Args: - speech_file: the name of the audio file. - """ - # [START construct_request] - with open(speech_file, 'rb') as speech: - # Base64 encode the binary audio file for inclusion in the JSON - # request. - speech_content = base64.b64encode(speech.read()) - - service = get_speech_service() - service_request = service.speech().syncrecognize( - body={ - 'config': { - # There are a bunch of config options you can specify. See - # https://goo.gl/KPZn97 for the full list. - 'encoding': 'LINEAR16', # raw 16-bit signed LE samples - 'sampleRate': 16000, # 16 khz - # See http://g.co/cloud/speech/docs/languages for a list of - # supported languages. - 'languageCode': 'en-US', # a BCP-47 language tag - }, - 'audio': { - 'content': speech_content.decode('UTF-8') - } - }) - # [END construct_request] - # [START send_request] - response = service_request.execute() - - # First print the raw json response - print(json.dumps(response, indent=2)) - - # Now print the actual transcriptions - for result in response.get('results', []): - print('Result:') - for alternative in result['alternatives']: - print(u' Alternative: {}'.format(alternative['transcript'])) - # [END send_request] - - -# [START run_application] -if __name__ == '__main__': - parser = argparse.ArgumentParser( - description=__doc__, - formatter_class=argparse.RawDescriptionHelpFormatter) - parser.add_argument( - 'speech_file', help='Full path of audio file to be recognized') - args = parser.parse_args() - main(args.speech_file) - # [END run_application] diff --git a/speech/api-client/transcribe_async.py b/speech/api-client/transcribe_async.py deleted file mode 100644 index 662a34744c0..00000000000 --- a/speech/api-client/transcribe_async.py +++ /dev/null @@ -1,107 +0,0 @@ -#!/usr/bin/env python -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Google Cloud Speech API sample application using the REST API for async -batch processing. - -Example usage: python transcribe.py resources/audio.raw -""" - -# [START import_libraries] -import argparse -import base64 -import json -import time - -import googleapiclient.discovery -# [END import_libraries] - - -# [START authenticating] - - -# Application default credentials provided by env variable -# GOOGLE_APPLICATION_CREDENTIALS -def get_speech_service(): - return googleapiclient.discovery.build('speech', 'v1beta1') -# [END authenticating] - - -def main(speech_file): - """Transcribe the given audio file asynchronously. - - Args: - speech_file: the name of the audio file. - """ - # [START construct_request] - with open(speech_file, 'rb') as speech: - # Base64 encode the binary audio file for inclusion in the request. - speech_content = base64.b64encode(speech.read()) - - service = get_speech_service() - service_request = service.speech().asyncrecognize( - body={ - 'config': { - # There are a bunch of config options you can specify. See - # https://goo.gl/KPZn97 for the full list. - 'encoding': 'LINEAR16', # raw 16-bit signed LE samples - 'sampleRate': 16000, # 16 khz - # See http://g.co/cloud/speech/docs/languages for a list of - # supported languages. - 'languageCode': 'en-US', # a BCP-47 language tag - }, - 'audio': { - 'content': speech_content.decode('UTF-8') - } - }) - # [END construct_request] - # [START send_request] - response = service_request.execute() - print(json.dumps(response)) - # [END send_request] - - name = response['name'] - # Construct a GetOperation request. - service_request = service.operations().get(name=name) - - while True: - # Give the server a few seconds to process. - print('Waiting for server processing...') - time.sleep(1) - # Get the long running operation with response. - response = service_request.execute() - - if 'done' in response and response['done']: - break - - # First print the raw json response - print(json.dumps(response['response'], indent=2)) - - # Now print the actual transcriptions - for result in response['response'].get('results', []): - print('Result:') - for alternative in result['alternatives']: - print(u' Alternative: {}'.format(alternative['transcript'])) - - -# [START run_application] -if __name__ == '__main__': - parser = argparse.ArgumentParser( - description=__doc__, - formatter_class=argparse.RawDescriptionHelpFormatter) - parser.add_argument( - 'speech_file', help='Full path of audio file to be recognized') - args = parser.parse_args() - main(args.speech_file) - # [END run_application] diff --git a/speech/api-client/transcribe_async_test.py b/speech/api-client/transcribe_async_test.py deleted file mode 100644 index ab147ae9fec..00000000000 --- a/speech/api-client/transcribe_async_test.py +++ /dev/null @@ -1,26 +0,0 @@ -# Copyright 2016, Google, Inc. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import re - -from transcribe_async import main - -RESOURCES = os.path.join(os.path.dirname(__file__), 'resources') - - -def test_main(capsys): - main(os.path.join(RESOURCES, 'audio.raw')) - out, err = capsys.readouterr() - - assert re.search(r'how old is the Brooklyn Bridge', out, re.DOTALL | re.I) diff --git a/speech/api-client/transcribe_test.py b/speech/api-client/transcribe_test.py deleted file mode 100644 index 9e34d9d2062..00000000000 --- a/speech/api-client/transcribe_test.py +++ /dev/null @@ -1,26 +0,0 @@ -# Copyright 2016, Google, Inc. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import re - -from transcribe import main - -RESOURCES = os.path.join(os.path.dirname(__file__), 'resources') - - -def test_main(capsys): - main(os.path.join(RESOURCES, 'audio.raw')) - out, err = capsys.readouterr() - - assert re.search(r'how old is the Brooklyn Bridge', out, re.DOTALL | re.I) diff --git a/speech/grpc/README.rst b/speech/grpc/README.rst deleted file mode 100644 index 519963b72cd..00000000000 --- a/speech/grpc/README.rst +++ /dev/null @@ -1,188 +0,0 @@ -.. This file is automatically generated. Do not edit this file directly. - -Google Cloud Speech API Python Samples -=============================================================================== - -This directory contains samples for Google Cloud Speech API. `Google Cloud Speech API`_ enables easy integration of Google speech recognition technologies into developer applications. Send audio and receive a text transcription from the Cloud Speech API service. - - - - -.. _Google Cloud Speech API: https://cloud.google.com/speech/docs - -Setup -------------------------------------------------------------------------------- - - -Authentication -++++++++++++++ - -Authentication is typically done through `Application Default Credentials`_, -which means you do not have to change the code to authenticate as long as -your environment has credentials. You have a few options for setting up -authentication: - -#. When running locally, use the `Google Cloud SDK`_ - - .. code-block:: bash - - gcloud beta auth application-default login - - -#. When running on App Engine or Compute Engine, credentials are already - set-up. However, you may need to configure your Compute Engine instance - with `additional scopes`_. - -#. You can create a `Service Account key file`_. This file can be used to - authenticate to Google Cloud Platform services from any environment. To use - the file, set the ``GOOGLE_APPLICATION_CREDENTIALS`` environment variable to - the path to the key file, for example: - - .. code-block:: bash - - export GOOGLE_APPLICATION_CREDENTIALS=/path/to/service_account.json - -.. _Application Default Credentials: https://cloud.google.com/docs/authentication#getting_credentials_for_server-centric_flow -.. _additional scopes: https://cloud.google.com/compute/docs/authentication#using -.. _Service Account key file: https://developers.google.com/identity/protocols/OAuth2ServiceAccount#creatinganaccount - -Install PortAudio -+++++++++++++++++ - -Install `PortAudio`_. This is required by the `PyAudio`_ library to stream -audio from your computer's microphone. PyAudio depends on PortAudio for cross-platform compatibility, and is installed differently depending on the -platform. - -* For Mac OS X, you can use `Homebrew`_:: - - brew install portaudio - - **Note**: if you encounter an error when running `pip install` that indicates - it can't find `portaudio.h`, try running `pip install` with the following - flags:: - - pip install --global-option='build_ext' \ - --global-option='-I/usr/local/include' \ - --global-option='-L/usr/local/lib' \ - pyaudio - -* For Debian / Ubuntu Linux:: - - apt-get install portaudio19-dev python-all-dev - -* Windows may work without having to install PortAudio explicitly (it will get - installed with PyAudio). - -For more details, see the `PyAudio installation`_ page. - - -.. _PyAudio: https://people.csail.mit.edu/hubert/pyaudio/ -.. _PortAudio: http://www.portaudio.com/ -.. _PyAudio installation: - https://people.csail.mit.edu/hubert/pyaudio/#downloads -.. _Homebrew: http://brew.sh - -Install Dependencies -++++++++++++++++++++ - -#. Install `pip`_ and `virtualenv`_ if you do not already have them. - -#. Create a virtualenv. Samples are compatible with Python 2.7 and 3.4+. - - .. code-block:: bash - - $ virtualenv env - $ source env/bin/activate - -#. Install the dependencies needed to run the samples. - - .. code-block:: bash - - $ pip install -r requirements.txt - -.. _pip: https://pip.pypa.io/ -.. _virtualenv: https://virtualenv.pypa.io/ - -Samples -------------------------------------------------------------------------------- - -Transcribe -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - - - -To run this sample: - -.. code-block:: bash - - $ python transcribe.py - - usage: transcribe.py [-h] [--encoding {LINEAR16,FLAC,MULAW,AMR,AMR_WB}] - [--sample_rate SAMPLE_RATE] - input_uri - - Transcribes a FLAC audio file stored in Google Cloud Storage using GRPC. - - Example usage: - python transcribe.py --encoding=FLAC --sample_rate=16000 gs://speech-demo/audio.flac - - positional arguments: - input_uri - - optional arguments: - -h, --help show this help message and exit - --encoding {LINEAR16,FLAC,MULAW,AMR,AMR_WB} - How the audio file is encoded. See https://github.com/ - googleapis/googleapis/blob/master/google/cloud/speech/ - v1beta1/cloud_speech.proto#L67 - --sample_rate SAMPLE_RATE - - -Transcribe async -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - - - -To run this sample: - -.. code-block:: bash - - $ python transcribe_async.py - - usage: transcribe_async.py [-h] [--encoding {LINEAR16,FLAC,MULAW,AMR,AMR_WB}] - [--sample_rate SAMPLE_RATE] - input_uri - - Sample that transcribes a FLAC audio file stored in Google Cloud Storage, - using async GRPC. - - Example usage: - python transcribe_async.py --encoding=FLAC --sample_rate=16000 gs://speech-demo/audio.flac - - positional arguments: - input_uri - - optional arguments: - -h, --help show this help message and exit - --encoding {LINEAR16,FLAC,MULAW,AMR,AMR_WB} - How the audio file is encoded. See https://github.com/ - googleapis/googleapis/blob/master/google/cloud/speech/ - v1beta1/cloud_speech.proto#L67 - --sample_rate SAMPLE_RATE - - -Transcribe streaming -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - - - -To run this sample: - -.. code-block:: bash - - $ python transcribe_streaming.py - - - - -.. _Google Cloud SDK: https://cloud.google.com/sdk/ \ No newline at end of file diff --git a/speech/grpc/README.rst.in b/speech/grpc/README.rst.in deleted file mode 100644 index 9e036d80637..00000000000 --- a/speech/grpc/README.rst.in +++ /dev/null @@ -1,25 +0,0 @@ -# This file is used to generate README.rst - -product: - name: Google Cloud Speech API - short_name: Cloud Speech API - url: https://cloud.google.com/speech/docs - description: > - `Google Cloud Speech API`_ enables easy integration of Google speech - recognition technologies into developer applications. Send audio and - receive a text transcription from the Cloud Speech API service. - -setup: -- auth -- install_portaudio -- install_deps - -samples: -- name: Transcribe - file: transcribe.py - show_help: true -- name: Transcribe async - file: transcribe_async.py - show_help: true -- name: Transcribe streaming - file: transcribe_streaming.py diff --git a/speech/grpc/requirements.txt b/speech/grpc/requirements.txt deleted file mode 100644 index 13377b67c29..00000000000 --- a/speech/grpc/requirements.txt +++ /dev/null @@ -1,6 +0,0 @@ -grpcio==1.1.0 -PyAudio==0.2.10 -proto-google-cloud-speech-v1beta1==0.15.1 -six==1.10.0 -requests==2.13.0 -google-auth==0.8.0 diff --git a/speech/grpc/resources/audio.raw b/speech/grpc/resources/audio.raw deleted file mode 100644 index 5ebf79d3c9c..00000000000 Binary files a/speech/grpc/resources/audio.raw and /dev/null differ diff --git a/speech/grpc/resources/quit.raw b/speech/grpc/resources/quit.raw deleted file mode 100644 index a01dfc45a59..00000000000 Binary files a/speech/grpc/resources/quit.raw and /dev/null differ diff --git a/speech/grpc/transcribe.py b/speech/grpc/transcribe.py deleted file mode 100644 index 399be275c47..00000000000 --- a/speech/grpc/transcribe.py +++ /dev/null @@ -1,98 +0,0 @@ -#!/usr/bin/python -# Copyright (C) 2016 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Transcribes a FLAC audio file stored in Google Cloud Storage using GRPC. - -Example usage: - python transcribe.py --encoding=FLAC --sample_rate=16000 \ - gs://speech-demo/audio.flac -""" - -import argparse - -import google.auth -import google.auth.transport.grpc -import google.auth.transport.requests -from google.cloud.proto.speech.v1beta1 import cloud_speech_pb2 - -# Keep the request alive for this many seconds -DEADLINE_SECS = 60 -SPEECH_SCOPE = 'https://www.googleapis.com/auth/cloud-platform' - - -def make_channel(host, port): - """Creates a secure channel with auth credentials from the environment.""" - # Grab application default credentials from the environment - credentials, _ = google.auth.default(scopes=[SPEECH_SCOPE]) - - # Create a secure channel using the credentials. - http_request = google.auth.transport.requests.Request() - target = '{}:{}'.format(host, port) - - return google.auth.transport.grpc.secure_authorized_channel( - credentials, http_request, target) - - -def main(input_uri, encoding, sample_rate, language_code='en-US'): - service = cloud_speech_pb2.SpeechStub( - make_channel('speech.googleapis.com', 443)) - - # The method and parameters can be inferred from the proto from which the - # grpc client lib was generated. See: - # https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1beta1/cloud_speech.proto - response = service.SyncRecognize(cloud_speech_pb2.SyncRecognizeRequest( - config=cloud_speech_pb2.RecognitionConfig( - # There are a bunch of config options you can specify. See - # https://goo.gl/KPZn97 for the full list. - encoding=encoding, # one of LINEAR16, FLAC, MULAW, AMR, AMR_WB - sample_rate=sample_rate, # the rate in hertz - # See https://g.co/cloud/speech/docs/languages for a list of - # supported languages. - language_code=language_code, # a BCP-47 language tag - ), - audio=cloud_speech_pb2.RecognitionAudio( - uri=input_uri, - ) - ), DEADLINE_SECS) - - # Print the recognition result alternatives and confidence scores. - for result in response.results: - print('Result:') - for alternative in result.alternatives: - print(u' ({}): {}'.format( - alternative.confidence, alternative.transcript)) - - -def _gcs_uri(text): - if not text.startswith('gs://'): - raise ValueError( - 'Cloud Storage uri must be of the form gs://bucket/path/') - return text - - -PROTO_URL = ('https://github.com/googleapis/googleapis/blob/master/' - 'google/cloud/speech/v1beta1/cloud_speech.proto') -if __name__ == '__main__': - parser = argparse.ArgumentParser( - description=__doc__, - formatter_class=argparse.RawDescriptionHelpFormatter) - parser.add_argument('input_uri', type=_gcs_uri) - parser.add_argument( - '--encoding', default='LINEAR16', choices=[ - 'LINEAR16', 'FLAC', 'MULAW', 'AMR', 'AMR_WB'], - help='How the audio file is encoded. See {}#L67'.format(PROTO_URL)) - parser.add_argument('--sample_rate', type=int, default=16000) - - args = parser.parse_args() - main(args.input_uri, args.encoding, args.sample_rate) diff --git a/speech/grpc/transcribe_async.py b/speech/grpc/transcribe_async.py deleted file mode 100644 index c0cd49cfb1a..00000000000 --- a/speech/grpc/transcribe_async.py +++ /dev/null @@ -1,127 +0,0 @@ -#!/usr/bin/env python -# Copyright (C) 2016 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -Sample that transcribes a FLAC audio file stored in Google Cloud Storage, -using async GRPC. - -Example usage: - python transcribe_async.py --encoding=FLAC --sample_rate=16000 \ - gs://speech-demo/audio.flac -""" - -import argparse -import time - -import google.auth -import google.auth.transport.grpc -import google.auth.transport.requests -from google.cloud.proto.speech.v1beta1 import cloud_speech_pb2 -from google.longrunning import operations_pb2 - -# Keep the request alive for this many seconds -DEADLINE_SECS = 10 -SPEECH_SCOPE = 'https://www.googleapis.com/auth/cloud-platform' - - -def make_channel(host, port): - """Creates a secure channel with auth credentials from the environment.""" - # Grab application default credentials from the environment - credentials, _ = google.auth.default(scopes=[SPEECH_SCOPE]) - - # Create a secure channel using the credentials. - http_request = google.auth.transport.requests.Request() - target = '{}:{}'.format(host, port) - - return google.auth.transport.grpc.secure_authorized_channel( - credentials, http_request, target) - - -def main(input_uri, encoding, sample_rate, language_code='en-US'): - channel = make_channel('speech.googleapis.com', 443) - service = cloud_speech_pb2.SpeechStub(channel) - - # The method and parameters can be inferred from the proto from which the - # grpc client lib was generated. See: - # https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1beta1/cloud_speech.proto - operation = service.AsyncRecognize(cloud_speech_pb2.AsyncRecognizeRequest( - config=cloud_speech_pb2.RecognitionConfig( - # There are a bunch of config options you can specify. See - # https://goo.gl/KPZn97 for the full list. - encoding=encoding, # one of LINEAR16, FLAC, MULAW, AMR, AMR_WB - sample_rate=sample_rate, # the rate in hertz - # See https://g.co/cloud/speech/docs/languages for a list of - # supported languages. - language_code=language_code, # a BCP-47 language tag - ), - audio=cloud_speech_pb2.RecognitionAudio( - uri=input_uri, - ) - ), DEADLINE_SECS) - - # Print the longrunning operation handle. - print(operation) - - # Construct a long running operation endpoint. - service = operations_pb2.OperationsStub(channel) - - name = operation.name - - while True: - # Give the server a few seconds to process. - print('Waiting for server processing...') - time.sleep(1) - operation = service.GetOperation( - operations_pb2.GetOperationRequest(name=name), - DEADLINE_SECS) - - if operation.error.message: - print('\nOperation error:\n{}'.format(operation.error)) - - if operation.done: - break - - response = cloud_speech_pb2.AsyncRecognizeResponse() - operation.response.Unpack(response) - # Print the recognition result alternatives and confidence scores. - for result in response.results: - print('Result:') - for alternative in result.alternatives: - print(u' ({}): {}'.format( - alternative.confidence, alternative.transcript)) - - -def _gcs_uri(text): - if not text.startswith('gs://'): - raise argparse.ArgumentTypeError( - 'Cloud Storage uri must be of the form gs://bucket/path/') - return text - - -if __name__ == '__main__': - parser = argparse.ArgumentParser( - description=__doc__, - formatter_class=argparse.RawDescriptionHelpFormatter) - parser.add_argument('input_uri', type=_gcs_uri) - parser.add_argument( - '--encoding', default='LINEAR16', choices=[ - 'LINEAR16', 'FLAC', 'MULAW', 'AMR', 'AMR_WB'], - help='How the audio file is encoded. See {}#L67'.format( - 'https://github.com/googleapis/googleapis/blob/master/' - 'google/cloud/speech/v1beta1/cloud_speech.proto')) - parser.add_argument('--sample_rate', type=int, default=16000) - - args = parser.parse_args() - main(args.input_uri, args.encoding, args.sample_rate) diff --git a/speech/grpc/transcribe_async_test.py b/speech/grpc/transcribe_async_test.py deleted file mode 100644 index 6a9563483f9..00000000000 --- a/speech/grpc/transcribe_async_test.py +++ /dev/null @@ -1,27 +0,0 @@ -# Copyright 2016, Google, Inc. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import re - -from transcribe_async import main - -BUCKET = os.environ['CLOUD_STORAGE_BUCKET'] - - -def test_main(capsys): - # Run the transcribe sample on audio.raw, verify correct results - storage_uri = 'gs://{}/speech/audio.raw'.format(BUCKET) - main(storage_uri, 'LINEAR16', 16000) - out, err = capsys.readouterr() - assert re.search(r'how old is the Brooklyn Bridge', out, re.DOTALL | re.I) diff --git a/speech/grpc/transcribe_streaming.py b/speech/grpc/transcribe_streaming.py deleted file mode 100644 index 33f3991638d..00000000000 --- a/speech/grpc/transcribe_streaming.py +++ /dev/null @@ -1,237 +0,0 @@ -#!/usr/bin/python -# Copyright (C) 2016 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Sample that streams audio to the Google Cloud Speech API via GRPC.""" - -from __future__ import division - -import contextlib -import functools -import re -import signal -import sys - - -import google.auth -import google.auth.transport.grpc -import google.auth.transport.requests -from google.cloud.proto.speech.v1beta1 import cloud_speech_pb2 -from google.rpc import code_pb2 -import grpc -import pyaudio -from six.moves import queue - -# Audio recording parameters -RATE = 16000 -CHUNK = int(RATE / 10) # 100ms - -# The Speech API has a streaming limit of 60 seconds of audio*, so keep the -# connection alive for that long, plus some more to give the API time to figure -# out the transcription. -# * https://g.co/cloud/speech/limits#content -DEADLINE_SECS = 60 * 3 + 5 -SPEECH_SCOPE = 'https://www.googleapis.com/auth/cloud-platform' - - -def make_channel(host, port): - """Creates a secure channel with auth credentials from the environment.""" - # Grab application default credentials from the environment - credentials, _ = google.auth.default(scopes=[SPEECH_SCOPE]) - - # Create a secure channel using the credentials. - http_request = google.auth.transport.requests.Request() - target = '{}:{}'.format(host, port) - - return google.auth.transport.grpc.secure_authorized_channel( - credentials, http_request, target) - - -def _audio_data_generator(buff): - """A generator that yields all available data in the given buffer. - - Args: - buff - a Queue object, where each element is a chunk of data. - Yields: - A chunk of data that is the aggregate of all chunks of data in `buff`. - The function will block until at least one data chunk is available. - """ - stop = False - while not stop: - # Use a blocking get() to ensure there's at least one chunk of data. - data = [buff.get()] - - # Now consume whatever other data's still buffered. - while True: - try: - data.append(buff.get(block=False)) - except queue.Empty: - break - - # `None` in the buffer signals that the audio stream is closed. Yield - # the final bit of the buffer and exit the loop. - if None in data: - stop = True - data.remove(None) - - yield b''.join(data) - - -def _fill_buffer(buff, in_data, frame_count, time_info, status_flags): - """Continuously collect data from the audio stream, into the buffer.""" - buff.put(in_data) - return None, pyaudio.paContinue - - -# [START audio_stream] -@contextlib.contextmanager -def record_audio(rate, chunk): - """Opens a recording stream in a context manager.""" - # Create a thread-safe buffer of audio data - buff = queue.Queue() - - audio_interface = pyaudio.PyAudio() - audio_stream = audio_interface.open( - format=pyaudio.paInt16, - # The API currently only supports 1-channel (mono) audio - # https://goo.gl/z757pE - channels=1, rate=rate, - input=True, frames_per_buffer=chunk, - # Run the audio stream asynchronously to fill the buffer object. - # This is necessary so that the input device's buffer doesn't overflow - # while the calling thread makes network requests, etc. - stream_callback=functools.partial(_fill_buffer, buff), - ) - - yield _audio_data_generator(buff) - - audio_stream.stop_stream() - audio_stream.close() - # Signal the _audio_data_generator to finish - buff.put(None) - audio_interface.terminate() -# [END audio_stream] - - -def request_stream(data_stream, rate, interim_results=True): - """Yields `StreamingRecognizeRequest`s constructed from a recording audio - stream. - - Args: - data_stream: A generator that yields raw audio data to send. - rate: The sampling rate in hertz. - interim_results: Whether to return intermediate results, before the - transcription is finalized. - """ - # The initial request must contain metadata about the stream, so the - # server knows how to interpret it. - recognition_config = cloud_speech_pb2.RecognitionConfig( - # There are a bunch of config options you can specify. See - # https://goo.gl/KPZn97 for the full list. - encoding='LINEAR16', # raw 16-bit signed LE samples - sample_rate=rate, # the rate in hertz - # See http://g.co/cloud/speech/docs/languages - # for a list of supported languages. - language_code='en-US', # a BCP-47 language tag - ) - streaming_config = cloud_speech_pb2.StreamingRecognitionConfig( - interim_results=interim_results, - config=recognition_config, - ) - - yield cloud_speech_pb2.StreamingRecognizeRequest( - streaming_config=streaming_config) - - for data in data_stream: - # Subsequent requests can all just have the content - yield cloud_speech_pb2.StreamingRecognizeRequest(audio_content=data) - - -def listen_print_loop(recognize_stream): - """Iterates through server responses and prints them. - - The recognize_stream passed is a generator that will block until a response - is provided by the server. When the transcription response comes, print it. - - In this case, responses are provided for interim results as well. If the - response is an interim one, print a line feed at the end of it, to allow - the next result to overwrite it, until the response is a final one. For the - final one, print a newline to preserve the finalized transcription. - """ - num_chars_printed = 0 - for resp in recognize_stream: - if resp.error.code != code_pb2.OK: - raise RuntimeError('Server error: ' + resp.error.message) - - if not resp.results: - continue - - # Display the top transcription - result = resp.results[0] - transcript = result.alternatives[0].transcript - - # Display interim results, but with a carriage return at the end of the - # line, so subsequent lines will overwrite them. - # - # If the previous result was longer than this one, we need to print - # some extra spaces to overwrite the previous result - overwrite_chars = ' ' * max(0, num_chars_printed - len(transcript)) - - if not result.is_final: - sys.stdout.write(transcript + overwrite_chars + '\r') - sys.stdout.flush() - - num_chars_printed = len(transcript) - - else: - print(transcript + overwrite_chars) - - # Exit recognition if any of the transcribed phrases could be - # one of our keywords. - if re.search(r'\b(exit|quit)\b', transcript, re.I): - print('Exiting..') - break - - num_chars_printed = 0 - - -def main(): - service = cloud_speech_pb2.SpeechStub( - make_channel('speech.googleapis.com', 443)) - - # For streaming audio from the microphone, there are three threads. - # First, a thread that collects audio data as it comes in - with record_audio(RATE, CHUNK) as buffered_audio_data: - # Second, a thread that sends requests with that data - requests = request_stream(buffered_audio_data, RATE) - # Third, a thread that listens for transcription responses - recognize_stream = service.StreamingRecognize( - requests, DEADLINE_SECS) - - # Exit things cleanly on interrupt - signal.signal(signal.SIGINT, lambda *_: recognize_stream.cancel()) - - # Now, put the transcription responses to use. - try: - listen_print_loop(recognize_stream) - - recognize_stream.cancel() - except grpc.RpcError as e: - code = e.code() - # CANCELLED is caused by the interrupt handler, which is expected. - if code is not code.CANCELLED: - raise - - -if __name__ == '__main__': - main() diff --git a/speech/grpc/transcribe_streaming_minute.py b/speech/grpc/transcribe_streaming_minute.py deleted file mode 100644 index 22b137a5ea8..00000000000 --- a/speech/grpc/transcribe_streaming_minute.py +++ /dev/null @@ -1,298 +0,0 @@ -#!/usr/bin/python - -# Copyright (C) 2016 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Sample that streams audio to the Google Cloud Speech API via GRPC. - -This sample expands on transcribe_streaming.py to work around the 1-minute -limit on streaming requests. It does this by transcribing normally until -WRAP_IT_UP_SECS seconds before the 1-minute limit. At that point, it waits for -the end of an utterance and once it hears it, it closes the current stream and -opens a new one. It also keeps a buffer of audio around while this is -happening, that it sends to the new stream in its initial request, to minimize -losing any speech that occurs while this happens. - -Note that you could do this a little more simply by simply re-starting the -stream after every utterance, though this increases the possibility of audio -being missed between streams. For learning purposes (and robustness), the more -complex implementation is shown here. -""" - -from __future__ import division - -import argparse -import collections -import contextlib -import functools -import logging -import re -import signal -import sys -import time - -import google.auth -import google.auth.transport.grpc -import google.auth.transport.requests -from google.cloud.proto.speech.v1beta1 import cloud_speech_pb2 -from google.rpc import code_pb2 -import grpc -import pyaudio -from six.moves import queue - -# Seconds you have to wrap up your utterance -WRAP_IT_UP_SECS = 15 -SECS_OVERLAP = 1 - -# Audio recording parameters -RATE = 16000 -CHUNK = int(RATE / 10) # 100ms - -# The Speech API has a streaming limit of 60 seconds of audio*, so keep the -# connection alive for that long, plus some more to give the API time to figure -# out the transcription. -# * https://g.co/cloud/speech/limits#content -DEADLINE_SECS = 60 * 3 + 5 -SPEECH_SCOPE = 'https://www.googleapis.com/auth/cloud-platform' - - -def make_channel(host): - """Creates a secure channel with auth credentials from the environment.""" - # Grab application default credentials from the environment - credentials, _ = google.auth.default(scopes=[SPEECH_SCOPE]) - - # Create a secure channel using the credentials. - http_request = google.auth.transport.requests.Request() - - return google.auth.transport.grpc.secure_authorized_channel( - credentials, http_request, host) - - -def _audio_data_generator(buff, overlap_buffer): - """A generator that yields all available data in the given buffer. - - Args: - buff (Queue): A Queue where each element is a chunk of data. - overlap_buffer (deque): a ring buffer for storing trailing data chunks - Yields: - bytes: A chunk of data that is the aggregate of all chunks of data in - `buff`. The function will block until at least one data chunk is - available. - """ - if overlap_buffer: - yield b''.join(overlap_buffer) - overlap_buffer.clear() - - while True: - # Use a blocking get() to ensure there's at least one chunk of data. - data = [buff.get()] - - # Now consume whatever other data's still buffered. - while True: - try: - data.append(buff.get(block=False)) - except queue.Empty: - break - - # `None` in the buffer signals that we should stop generating. Put the - # data back into the buffer for the next generator. - if None in data: - data.remove(None) - if data: - buff.put(b''.join(data)) - break - else: - overlap_buffer.extend(data) - - yield b''.join(data) - - -def _fill_buffer(buff, in_data, frame_count, time_info, status_flags): - """Continuously collect data from the audio stream, into the buffer.""" - buff.put(in_data) - return None, pyaudio.paContinue - - -# [START audio_stream] -@contextlib.contextmanager -def record_audio(rate, chunk): - """Opens a recording stream in a context manager.""" - # Create a thread-safe buffer of audio data - buff = queue.Queue() - - audio_interface = pyaudio.PyAudio() - audio_stream = audio_interface.open( - format=pyaudio.paInt16, - # The API currently only supports 1-channel (mono) audio - # https://goo.gl/z757pE - channels=1, rate=rate, - input=True, frames_per_buffer=chunk, - # Run the audio stream asynchronously to fill the buffer object. - # This is necessary so that the input device's buffer doesn't overflow - # while the calling thread makes network requests, etc. - stream_callback=functools.partial(_fill_buffer, buff), - ) - - yield buff - - audio_stream.stop_stream() - audio_stream.close() - # Signal the _audio_data_generator to finish - buff.put(None) - audio_interface.terminate() -# [END audio_stream] - - -def request_stream(data_stream, rate, interim_results=True): - """Yields `StreamingRecognizeRequest`s constructed from a recording audio - stream. - - Args: - data_stream (generator): The raw audio data to send. - rate (int): The sampling rate in hertz. - interim_results (boolean): Whether to return intermediate results, - before the transcription is finalized. - """ - # The initial request must contain metadata about the stream, so the - # server knows how to interpret it. - recognition_config = cloud_speech_pb2.RecognitionConfig( - # There are a bunch of config options you can specify. See - # https://goo.gl/KPZn97 for the full list. - encoding='LINEAR16', # raw 16-bit signed LE samples - sample_rate=rate, # the rate in hertz - # See http://g.co/cloud/speech/docs/languages - # for a list of supported languages. - language_code='en-US', # a BCP-47 language tag - ) - streaming_config = cloud_speech_pb2.StreamingRecognitionConfig( - interim_results=interim_results, - config=recognition_config, - ) - - yield cloud_speech_pb2.StreamingRecognizeRequest( - streaming_config=streaming_config) - - for data in data_stream: - # Subsequent requests can all just have the content - yield cloud_speech_pb2.StreamingRecognizeRequest(audio_content=data) - - -def listen_print_loop( - recognize_stream, wrap_it_up_secs, buff, max_recog_secs=60): - """Iterates through server responses and prints them. - - The recognize_stream passed is a generator that will block until a response - is provided by the server. When the transcription response comes, print it. - - In this case, responses are provided for interim results as well. If the - response is an interim one, print a line feed at the end of it, to allow - the next result to overwrite it, until the response is a final one. For the - final one, print a newline to preserve the finalized transcription. - """ - # What time should we switch to a new stream? - time_to_switch = time.time() + max_recog_secs - wrap_it_up_secs - graceful_exit = False - num_chars_printed = 0 - for resp in recognize_stream: - if resp.error.code != code_pb2.OK: - raise RuntimeError('Server error: ' + resp.error.message) - - if not resp.results: - if resp.endpointer_type is resp.END_OF_SPEECH and ( - time.time() > time_to_switch): - graceful_exit = True - buff.put(None) - continue - - # Display the top transcription - result = resp.results[0] - transcript = result.alternatives[0].transcript - - # If the previous result was longer than this one, we need to print - # some extra spaces to overwrite the previous result - overwrite_chars = ' ' * max(0, num_chars_printed - len(transcript)) - - # Display interim results, but with a carriage return at the end of the - # line, so subsequent lines will overwrite them. - if not result.is_final: - sys.stdout.write(transcript + overwrite_chars + '\r') - sys.stdout.flush() - - num_chars_printed = len(transcript) - - else: - print(transcript + overwrite_chars) - - # Exit recognition if any of the transcribed phrases could be - # one of our keywords. - if re.search(r'\b(exit|quit)\b', transcript, re.I): - print('Exiting..') - recognize_stream.cancel() - - elif graceful_exit: - break - - num_chars_printed = 0 - - -def main(): - service = cloud_speech_pb2.SpeechStub( - make_channel('speech.googleapis.com')) - - # For streaming audio from the microphone, there are three threads. - # First, a thread that collects audio data as it comes in - with record_audio(RATE, CHUNK) as buff: - # Second, a thread that sends requests with that data - overlap_buffer = collections.deque( - maxlen=int(SECS_OVERLAP * RATE / CHUNK)) - requests = request_stream( - _audio_data_generator(buff, overlap_buffer), RATE) - # Third, a thread that listens for transcription responses - recognize_stream = service.StreamingRecognize( - requests, DEADLINE_SECS) - - # Exit things cleanly on interrupt - signal.signal(signal.SIGINT, lambda *_: recognize_stream.cancel()) - - # Now, put the transcription responses to use. - try: - while True: - listen_print_loop(recognize_stream, WRAP_IT_UP_SECS, buff) - - # Discard this stream and create a new one. - # Note: calling .cancel() doesn't immediately raise an RpcError - # - it only raises when the iterator's next() is requested - recognize_stream.cancel() - - logging.debug('Starting new stream') - requests = request_stream(_audio_data_generator( - buff, overlap_buffer), RATE) - recognize_stream = service.StreamingRecognize( - requests, DEADLINE_SECS) - - except grpc.RpcError: - # This happens because of the interrupt handler - pass - - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument( - '-v', '--verbose', help='increase output verbosity', - action='store_true') - args = parser.parse_args() - if args.verbose: - logging.basicConfig(level=logging.DEBUG) - - main() diff --git a/speech/grpc/transcribe_streaming_minute_test.py b/speech/grpc/transcribe_streaming_minute_test.py deleted file mode 100644 index 9a165fcf264..00000000000 --- a/speech/grpc/transcribe_streaming_minute_test.py +++ /dev/null @@ -1,99 +0,0 @@ -# Copyright 2016, Google, Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import logging -import os -import re -import threading -import time - -import transcribe_streaming_minute as transcribe_streaming - -RESOURCES = os.path.join(os.path.dirname(__file__), 'resources') - - -class MockPyAudio(object): - def __init__(self, *audio_filenames): - self.audio_filenames = audio_filenames - - def __call__(self, *args): - return self - - def open(self, stream_callback, *args, **kwargs): - self.closed = threading.Event() - self.stream_thread = threading.Thread( - target=self.stream_audio, args=( - self.audio_filenames, stream_callback, self.closed)) - self.stream_thread.start() - return self - - def close(self): - self.closed.set() - - def stop_stream(self): - pass - - def terminate(self): - pass - - @staticmethod - def stream_audio(audio_filenames, callback, closed, num_frames=512): - # audio is 16-bit samples, whereas python byte is 8-bit - num_bytes = 2 * num_frames - # Approximate realtime by sleeping for the appropriate time for the - # requested number of frames - sleep_secs = num_frames / float(transcribe_streaming.RATE) - - for audio_filename in audio_filenames: - with open(audio_filename, 'rb') as audio_file: - # While the audio stream hasn't been closed, give it chunks of - # the audio file, until we run out of audio file. - while not closed.is_set(): - chunk = audio_file.read(num_bytes) - if not chunk: - break - time.sleep(sleep_secs) - callback(chunk, None, None, None) - else: - break - - # Ran out of audio data. Give a second of silence between files - for _ in range(int(1 + 1 / sleep_secs)): - if closed.is_set(): - break - time.sleep(sleep_secs) - callback(b'\0' * num_bytes, None, None, None) - else: - # No more audio left. Just give silence until we're done - while not closed.is_set(): - time.sleep(sleep_secs) - callback(b'\0' * num_bytes, None, None, None) - - -def test_main(monkeypatch, capsys, caplog): - caplog.setLevel(logging.DEBUG) - monkeypatch.setattr( - transcribe_streaming.pyaudio, 'PyAudio', - MockPyAudio( - os.path.join(RESOURCES, 'audio.raw'), - os.path.join(RESOURCES, 'quit.raw'))) - monkeypatch.setattr( - transcribe_streaming, 'WRAP_IT_UP_SECS', 59) - - transcribe_streaming.main() - out, err = capsys.readouterr() - - assert re.search( - r'old is the.*quit', out, re.DOTALL | re.I) - assert 'Starting new stream' in caplog.text() diff --git a/speech/grpc/transcribe_streaming_test.py b/speech/grpc/transcribe_streaming_test.py deleted file mode 100644 index 3b8d697a180..00000000000 --- a/speech/grpc/transcribe_streaming_test.py +++ /dev/null @@ -1,69 +0,0 @@ -# Copyright 2016, Google, Inc. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import re -import threading -import time - -import transcribe_streaming - -RESOURCES = os.path.join(os.path.dirname(__file__), 'resources') - - -class MockPyAudio(object): - def __init__(self, audio_filename): - self.audio_filename = audio_filename - - def __call__(self, *args): - return self - - def open(self, stream_callback, *args, **kwargs): - self.closed = threading.Event() - self.stream_thread = threading.Thread( - target=self.stream_audio, args=( - self.audio_filename, stream_callback, self.closed)) - self.stream_thread.start() - return self - - def close(self): - self.closed.set() - - def stop_stream(self): - pass - - def terminate(self): - pass - - @staticmethod - def stream_audio(audio_filename, callback, closed, num_frames=512): - with open(audio_filename, 'rb') as audio_file: - while not closed.is_set(): - # Approximate realtime by sleeping for the appropriate time for - # the requested number of frames - time.sleep(num_frames / float(transcribe_streaming.RATE)) - # audio is 16-bit samples, whereas python byte is 8-bit - num_bytes = 2 * num_frames - chunk = audio_file.read(num_bytes) or b'\0' * num_bytes - callback(chunk, None, None, None) - - -def test_main(monkeypatch, capsys): - monkeypatch.setattr( - transcribe_streaming.pyaudio, 'PyAudio', - MockPyAudio(os.path.join(RESOURCES, 'quit.raw'))) - - transcribe_streaming.main() - out, err = capsys.readouterr() - - assert re.search(r'quit', out, re.DOTALL | re.I) diff --git a/speech/grpc/transcribe_test.py b/speech/grpc/transcribe_test.py deleted file mode 100644 index 2826bf6e2a5..00000000000 --- a/speech/grpc/transcribe_test.py +++ /dev/null @@ -1,27 +0,0 @@ -# Copyright 2016, Google, Inc. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import re - -from transcribe import main - -BUCKET = os.environ['CLOUD_STORAGE_BUCKET'] - - -def test_main(capsys): - # Run the transcribe sample on audio.raw, verify correct results - storage_uri = 'gs://{}/speech/audio.raw'.format(BUCKET) - main(storage_uri, 'LINEAR16', 16000) - out, err = capsys.readouterr() - assert re.search(r'how old is the Brooklyn Bridge', out, re.DOTALL | re.I)