Skip to content

Commit 3744103

Browse files
lukesneeringerdhermes
authored andcommitted
Speech GAPIC to master (#3607)
* Vendor the GAPIC for Speech. * Speech Partial Veneer (#3483) * Update to docs based on @dhermes catch. * Fix incorrect variable. * Fix the docs. * Style fixes to unit tests. * More PR review from me.
1 parent 5d49402 commit 3744103

File tree

24 files changed

+2435
-155
lines changed

24 files changed

+2435
-155
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
__import__('pkg_resources').declare_namespace(__name__)
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
__import__('pkg_resources').declare_namespace(__name__)

packages/google-cloud-speech/google/cloud/gapic/speech/v1/__init__.py

Whitespace-only changes.
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
# Copyright 2016 Google Inc. All rights reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
"""Wrappers for protocol buffer enum types."""
15+
16+
17+
class RecognitionConfig(object):
18+
class AudioEncoding(object):
19+
"""
20+
Audio encoding of the data sent in the audio message. All encodings support
21+
only 1 channel (mono) audio. Only ``FLAC`` includes a header that describes
22+
the bytes of audio that follow the header. The other encodings are raw
23+
audio bytes with no header.
24+
25+
For best results, the audio source should be captured and transmitted using
26+
a lossless encoding (``FLAC`` or ``LINEAR16``). Recognition accuracy may be
27+
reduced if lossy codecs, which include the other codecs listed in
28+
this section, are used to capture or transmit the audio, particularly if
29+
background noise is present.
30+
31+
Attributes:
32+
ENCODING_UNSPECIFIED (int): Not specified. Will return result ``google.rpc.Code.INVALID_ARGUMENT``.
33+
LINEAR16 (int): Uncompressed 16-bit signed little-endian samples (Linear PCM).
34+
FLAC (int): ```FLAC`` <https://xiph.org/flac/documentation.html>`_ (Free Lossless Audio
35+
Codec) is the recommended encoding because it is
36+
lossless--therefore recognition is not compromised--and
37+
requires only about half the bandwidth of ``LINEAR16``. ``FLAC`` stream
38+
encoding supports 16-bit and 24-bit samples, however, not all fields in
39+
``STREAMINFO`` are supported.
40+
MULAW (int): 8-bit samples that compand 14-bit audio samples using G.711 PCMU/mu-law.
41+
AMR (int): Adaptive Multi-Rate Narrowband codec. ``sample_rate_hertz`` must be 8000.
42+
AMR_WB (int): Adaptive Multi-Rate Wideband codec. ``sample_rate_hertz`` must be 16000.
43+
OGG_OPUS (int): Opus encoded audio frames in Ogg container
44+
(`OggOpus <https://wiki.xiph.org/OggOpus>`_).
45+
``sample_rate_hertz`` must be 16000.
46+
SPEEX_WITH_HEADER_BYTE (int): Although the use of lossy encodings is not recommended, if a very low
47+
bitrate encoding is required, ``OGG_OPUS`` is highly preferred over
48+
Speex encoding. The `Speex <https://speex.org/>`_ encoding supported by
49+
Cloud Speech API has a header byte in each block, as in MIME type
50+
``audio/x-speex-with-header-byte``.
51+
It is a variant of the RTP Speex encoding defined in
52+
`RFC 5574 <https://tools.ietf.org/html/rfc5574>`_.
53+
The stream is a sequence of blocks, one block per RTP packet. Each block
54+
starts with a byte containing the length of the block, in bytes, followed
55+
by one or more frames of Speex data, padded to an integral number of
56+
bytes (octets) as specified in RFC 5574. In other words, each RTP header
57+
is replaced with a single byte containing the block length. Only Speex
58+
wideband is supported. ``sample_rate_hertz`` must be 16000.
59+
"""
60+
ENCODING_UNSPECIFIED = 0
61+
LINEAR16 = 1
62+
FLAC = 2
63+
MULAW = 3
64+
AMR = 4
65+
AMR_WB = 5
66+
OGG_OPUS = 6
67+
SPEEX_WITH_HEADER_BYTE = 7
68+
69+
70+
class StreamingRecognizeResponse(object):
71+
class SpeechEventType(object):
72+
"""
73+
Indicates the type of speech event.
74+
75+
Attributes:
76+
SPEECH_EVENT_UNSPECIFIED (int): No speech event specified.
77+
END_OF_SINGLE_UTTERANCE (int): This event indicates that the server has detected the end of the user's
78+
speech utterance and expects no additional speech. Therefore, the server
79+
will not process additional audio (although it may subsequently return
80+
additional results). The client should stop sending additional audio
81+
data, half-close the gRPC connection, and wait for any additional results
82+
until the server closes the gRPC connection. This event is only sent if
83+
``single_utterance`` was set to ``true``, and is not used otherwise.
84+
"""
85+
SPEECH_EVENT_UNSPECIFIED = 0
86+
END_OF_SINGLE_UTTERANCE = 1
Lines changed: 285 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,285 @@
1+
# Copyright 2017, Google Inc. All rights reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
#
15+
# EDITING INSTRUCTIONS
16+
# This file was generated from the file
17+
# https://github.com/google/googleapis/blob/master/google/cloud/speech/v1/cloud_speech.proto,
18+
# and updates to that file get reflected here through a refresh process.
19+
# For the short term, the refresh process will only be runnable by Google engineers.
20+
#
21+
# The only allowed edits are to method and file documentation. A 3-way
22+
# merge preserves those additions if the generated source changes.
23+
"""Accesses the google.cloud.speech.v1 Speech API."""
24+
25+
import collections
26+
import json
27+
import os
28+
import pkg_resources
29+
import platform
30+
31+
from google.gapic.longrunning import operations_client
32+
from google.gax import api_callable
33+
from google.gax import config
34+
from google.gax import path_template
35+
from google.gax.utils import oneof
36+
import google.gax
37+
38+
from google.cloud.gapic.speech.v1 import enums
39+
from google.cloud.proto.speech.v1 import cloud_speech_pb2
40+
41+
42+
class SpeechClient(object):
43+
"""Service that implements Google Cloud Speech API."""
44+
45+
SERVICE_ADDRESS = 'speech.googleapis.com'
46+
"""The default address of the service."""
47+
48+
DEFAULT_SERVICE_PORT = 443
49+
"""The default port of the service."""
50+
51+
# The scopes needed to make gRPC calls to all of the methods defined in
52+
# this service
53+
_ALL_SCOPES = ('https://www.googleapis.com/auth/cloud-platform', )
54+
55+
def __init__(self,
56+
service_path=SERVICE_ADDRESS,
57+
port=DEFAULT_SERVICE_PORT,
58+
channel=None,
59+
credentials=None,
60+
ssl_credentials=None,
61+
scopes=None,
62+
client_config=None,
63+
app_name=None,
64+
app_version='',
65+
lib_name=None,
66+
lib_version='',
67+
metrics_headers=()):
68+
"""Constructor.
69+
70+
Args:
71+
service_path (string): The domain name of the API remote host.
72+
port (int): The port on which to connect to the remote host.
73+
channel (:class:`grpc.Channel`): A ``Channel`` instance through
74+
which to make calls.
75+
credentials (object): The authorization credentials to attach to
76+
requests. These credentials identify this application to the
77+
service.
78+
ssl_credentials (:class:`grpc.ChannelCredentials`): A
79+
``ChannelCredentials`` instance for use with an SSL-enabled
80+
channel.
81+
scopes (list[string]): A list of OAuth2 scopes to attach to requests.
82+
client_config (dict):
83+
A dictionary for call options for each method. See
84+
:func:`google.gax.construct_settings` for the structure of
85+
this data. Falls back to the default config if not specified
86+
or the specified config is missing data points.
87+
app_name (string): The name of the application calling
88+
the service. Recommended for analytics purposes.
89+
app_version (string): The version of the application calling
90+
the service. Recommended for analytics purposes.
91+
lib_name (string): The API library software used for calling
92+
the service. (Unless you are writing an API client itself,
93+
leave this as default.)
94+
lib_version (string): The API library software version used
95+
for calling the service. (Unless you are writing an API client
96+
itself, leave this as default.)
97+
metrics_headers (dict): A dictionary of values for tracking
98+
client library metrics. Ultimately serializes to a string
99+
(e.g. 'foo/1.2.3 bar/3.14.1'). This argument should be
100+
considered private.
101+
102+
Returns:
103+
A SpeechClient object.
104+
"""
105+
# Unless the calling application specifically requested
106+
# OAuth scopes, request everything.
107+
if scopes is None:
108+
scopes = self._ALL_SCOPES
109+
110+
# Initialize an empty client config, if none is set.
111+
if client_config is None:
112+
client_config = {}
113+
114+
# Initialize metrics_headers as an ordered dictionary
115+
# (cuts down on cardinality of the resulting string slightly).
116+
metrics_headers = collections.OrderedDict(metrics_headers)
117+
metrics_headers['gl-python'] = platform.python_version()
118+
119+
# The library may or may not be set, depending on what is
120+
# calling this client. Newer client libraries set the library name
121+
# and version.
122+
if lib_name:
123+
metrics_headers[lib_name] = lib_version
124+
125+
# Finally, track the GAPIC package version.
126+
metrics_headers['gapic'] = pkg_resources.get_distribution(
127+
'google-cloud-speech', ).version
128+
129+
# Load the configuration defaults.
130+
default_client_config = json.loads(
131+
pkg_resources.resource_string(
132+
__name__, 'speech_client_config.json').decode())
133+
defaults = api_callable.construct_settings(
134+
'google.cloud.speech.v1.Speech',
135+
default_client_config,
136+
client_config,
137+
config.STATUS_CODE_NAMES,
138+
metrics_headers=metrics_headers, )
139+
self.speech_stub = config.create_stub(
140+
cloud_speech_pb2.SpeechStub,
141+
channel=channel,
142+
service_path=service_path,
143+
service_port=port,
144+
credentials=credentials,
145+
scopes=scopes,
146+
ssl_credentials=ssl_credentials)
147+
148+
self.operations_client = operations_client.OperationsClient(
149+
service_path=service_path,
150+
port=port,
151+
channel=channel,
152+
credentials=credentials,
153+
ssl_credentials=ssl_credentials,
154+
scopes=scopes,
155+
client_config=client_config,
156+
metrics_headers=metrics_headers, )
157+
158+
self._recognize = api_callable.create_api_call(
159+
self.speech_stub.Recognize, settings=defaults['recognize'])
160+
self._long_running_recognize = api_callable.create_api_call(
161+
self.speech_stub.LongRunningRecognize,
162+
settings=defaults['long_running_recognize'])
163+
self._streaming_recognize = api_callable.create_api_call(
164+
self.speech_stub.StreamingRecognize,
165+
settings=defaults['streaming_recognize'])
166+
167+
# Service calls
168+
def recognize(self, config, audio, options=None):
169+
"""
170+
Performs synchronous speech recognition: receive results after all audio
171+
has been sent and processed.
172+
173+
Example:
174+
>>> from google.cloud.gapic.speech.v1 import speech_client
175+
>>> from google.cloud.gapic.speech.v1 import enums
176+
>>> from google.cloud.proto.speech.v1 import cloud_speech_pb2
177+
>>> client = speech_client.SpeechClient()
178+
>>> encoding = enums.RecognitionConfig.AudioEncoding.FLAC
179+
>>> sample_rate_hertz = 44100
180+
>>> language_code = 'en-US'
181+
>>> config = cloud_speech_pb2.RecognitionConfig(encoding=encoding, sample_rate_hertz=sample_rate_hertz, language_code=language_code)
182+
>>> uri = 'gs://bucket_name/file_name.flac'
183+
>>> audio = cloud_speech_pb2.RecognitionAudio(uri=uri)
184+
>>> response = client.recognize(config, audio)
185+
186+
Args:
187+
config (:class:`google.cloud.proto.speech.v1.cloud_speech_pb2.RecognitionConfig`): *Required* Provides information to the recognizer that specifies how to
188+
process the request.
189+
audio (:class:`google.cloud.proto.speech.v1.cloud_speech_pb2.RecognitionAudio`): *Required* The audio data to be recognized.
190+
options (:class:`google.gax.CallOptions`): Overrides the default
191+
settings for this call, e.g, timeout, retries etc.
192+
193+
Returns:
194+
A :class:`google.cloud.proto.speech.v1.cloud_speech_pb2.RecognizeResponse` instance.
195+
196+
Raises:
197+
:exc:`google.gax.errors.GaxError` if the RPC is aborted.
198+
:exc:`ValueError` if the parameters are invalid.
199+
"""
200+
# Create the request object.
201+
request = cloud_speech_pb2.RecognizeRequest(config=config, audio=audio)
202+
return self._recognize(request, options)
203+
204+
def long_running_recognize(self, config, audio, options=None):
205+
"""
206+
Performs asynchronous speech recognition: receive results via the
207+
google.longrunning.Operations interface. Returns either an
208+
``Operation.error`` or an ``Operation.response`` which contains
209+
a ``LongRunningRecognizeResponse`` message.
210+
211+
Example:
212+
>>> from google.cloud.gapic.speech.v1 import speech_client
213+
>>> from google.cloud.gapic.speech.v1 import enums
214+
>>> from google.cloud.proto.speech.v1 import cloud_speech_pb2
215+
>>> client = speech_client.SpeechClient()
216+
>>> encoding = enums.RecognitionConfig.AudioEncoding.FLAC
217+
>>> sample_rate_hertz = 44100
218+
>>> language_code = 'en-US'
219+
>>> config = cloud_speech_pb2.RecognitionConfig(encoding=encoding, sample_rate_hertz=sample_rate_hertz, language_code=language_code)
220+
>>> uri = 'gs://bucket_name/file_name.flac'
221+
>>> audio = cloud_speech_pb2.RecognitionAudio(uri=uri)
222+
>>> response = client.long_running_recognize(config, audio)
223+
>>>
224+
>>> def callback(operation_future):
225+
>>> # Handle result.
226+
>>> result = operation_future.result()
227+
>>>
228+
>>> response.add_done_callback(callback)
229+
>>>
230+
>>> # Handle metadata.
231+
>>> metadata = response.metadata()
232+
233+
Args:
234+
config (:class:`google.cloud.proto.speech.v1.cloud_speech_pb2.RecognitionConfig`): *Required* Provides information to the recognizer that specifies how to
235+
process the request.
236+
audio (:class:`google.cloud.proto.speech.v1.cloud_speech_pb2.RecognitionAudio`): *Required* The audio data to be recognized.
237+
options (:class:`google.gax.CallOptions`): Overrides the default
238+
settings for this call, e.g, timeout, retries etc.
239+
240+
Returns:
241+
A :class:`google.gax._OperationFuture` instance.
242+
243+
Raises:
244+
:exc:`google.gax.errors.GaxError` if the RPC is aborted.
245+
:exc:`ValueError` if the parameters are invalid.
246+
"""
247+
# Create the request object.
248+
request = cloud_speech_pb2.LongRunningRecognizeRequest(
249+
config=config, audio=audio)
250+
return google.gax._OperationFuture(
251+
self._long_running_recognize(request,
252+
options), self.operations_client,
253+
cloud_speech_pb2.LongRunningRecognizeResponse,
254+
cloud_speech_pb2.LongRunningRecognizeMetadata, options)
255+
256+
def streaming_recognize(self, requests, options=None):
257+
"""
258+
Performs bidirectional streaming speech recognition: receive results while
259+
sending audio. This method is only available via the gRPC API (not REST).
260+
261+
EXPERIMENTAL: This method interface might change in the future.
262+
263+
Example:
264+
>>> from google.cloud.gapic.speech.v1 import speech_client
265+
>>> from google.cloud.proto.speech.v1 import cloud_speech_pb2
266+
>>> client = speech_client.SpeechClient()
267+
>>> request = cloud_speech_pb2.StreamingRecognizeRequest()
268+
>>> requests = [request]
269+
>>> for element in client.streaming_recognize(requests):
270+
>>> # process element
271+
>>> pass
272+
273+
Args:
274+
requests (iterator[:class:`google.cloud.proto.speech.v1.cloud_speech_pb2.StreamingRecognizeRequest`]): The input objects.
275+
options (:class:`google.gax.CallOptions`): Overrides the default
276+
settings for this call, e.g, timeout, retries etc.
277+
278+
Returns:
279+
iterator[:class:`google.cloud.proto.speech.v1.cloud_speech_pb2.StreamingRecognizeResponse`].
280+
281+
Raises:
282+
:exc:`google.gax.errors.GaxError` if the RPC is aborted.
283+
:exc:`ValueError` if the parameters are invalid.
284+
"""
285+
return self._streaming_recognize(requests, options)

0 commit comments

Comments
 (0)