diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 5dff1fb6679..29a6ace5058 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -62,6 +62,7 @@ /language/**/* @GoogleCloudPlatform/dee-data-ai @GoogleCloudPlatform/python-samples-reviewers /logging/**/* @GoogleCloudPlatform/dee-observability @GoogleCloudPlatform/python-samples-reviewers /media_cdn/**/* @justin-mp @msampathkumar @GoogleCloudPlatform/python-samples-reviewers +/media-translation/**/* @GoogleCloudPlatform/dee-data-ai @GoogleCloudPlatform/python-samples-reviewers /memorystore/**/* @GoogleCloudPlatform/python-samples-reviewers /ml_engine/**/* @ivanmkc @GoogleCloudPlatform/python-samples-reviewers /monitoring/**/* @GoogleCloudPlatform/dee-observability @GoogleCloudPlatform/python-samples-reviewers diff --git a/.github/blunderbuss.yml b/.github/blunderbuss.yml index e0147969d9a..6ce568b9e6e 100644 --- a/.github/blunderbuss.yml +++ b/.github/blunderbuss.yml @@ -213,6 +213,7 @@ assign_prs_by: - 'api: enterpriseknowledgegraph' - 'api: documentai' - 'api: retail' + - 'api: mediatranslation' to: - GoogleCloudPlatform/dee-data-ai - labels: diff --git a/media-translation/README.md b/media-translation/README.md deleted file mode 100644 index ca5f8c95e8a..00000000000 --- a/media-translation/README.md +++ /dev/null @@ -1,3 +0,0 @@ -These samples have been moved. - -https://github.com/googleapis/python-media-translation/tree/main/samples/snippets diff --git a/media-translation/snippets/noxfile_config.py b/media-translation/snippets/noxfile_config.py new file mode 100644 index 00000000000..4622f761678 --- /dev/null +++ b/media-translation/snippets/noxfile_config.py @@ -0,0 +1,42 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Default TEST_CONFIG_OVERRIDE for python repos. + +# You can copy this file into your directory, then it will be imported from +# the noxfile.py. + +# The source of truth: +# https://github.com/GoogleCloudPlatform/python-docs-samples/blob/main/noxfile_config.py + +TEST_CONFIG_OVERRIDE = { + # You can opt out from the test for specific Python versions. + "ignored_versions": ["2.7", "3.6"], + # Old samples are opted out of enforcing Python type hints + # All new samples should feature them + "enforce_type_hints": False, + # An envvar key for determining the project id to use. Change it + # to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a + # build specific Cloud project. You can also use your own string + # to use your own Cloud project. + "gcloud_project_env": "GOOGLE_CLOUD_PROJECT", + # 'gcloud_project_env': 'BUILD_SPECIFIC_GCLOUD_PROJECT', + # If you need to use a specific version of pip, + # change pip_version_override to the string representation + # of the version number, for example, "20.2.4" + "pip_version_override": None, + # A dictionary you want to inject into your test. Don't put any + # secrets here. These values will override predefined values. + "envs": {}, +} diff --git a/media-translation/snippets/requirements-test.txt b/media-translation/snippets/requirements-test.txt new file mode 100644 index 00000000000..c021c5b5b70 --- /dev/null +++ b/media-translation/snippets/requirements-test.txt @@ -0,0 +1 @@ +pytest==7.2.2 diff --git a/media-translation/snippets/requirements.txt b/media-translation/snippets/requirements.txt new file mode 100644 index 00000000000..494803bd3c2 --- /dev/null +++ b/media-translation/snippets/requirements.txt @@ -0,0 +1,3 @@ +google-cloud-media-translation==0.11.2 +pyaudio==0.2.13 +six==1.16.0 \ No newline at end of file diff --git a/media-translation/snippets/resources/audio.raw b/media-translation/snippets/resources/audio.raw new file mode 100644 index 00000000000..5ebf79d3c9c Binary files /dev/null and b/media-translation/snippets/resources/audio.raw differ diff --git a/media-translation/snippets/translate_from_file.py b/media-translation/snippets/translate_from_file.py new file mode 100644 index 00000000000..3e746b2be1e --- /dev/null +++ b/media-translation/snippets/translate_from_file.py @@ -0,0 +1,71 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Cloud Media Translation sample application. + +Example usage: + python translate_from_file.py resources/audio.raw +""" + +# [START mediatranslation_translate_from_file] +from google.cloud import mediatranslation + + +def translate_from_file(file_path="path/to/your/file"): + client = mediatranslation.SpeechTranslationServiceClient() + + # The `sample_rate_hertz` field is not required for FLAC and WAV (Linear16) + # encoded data. Other audio encodings must provide the sampling rate. + audio_config = mediatranslation.TranslateSpeechConfig( + audio_encoding="linear16", + source_language_code="en-US", + target_language_code="fr-FR", + ) + + streaming_config = mediatranslation.StreamingTranslateSpeechConfig( + audio_config=audio_config, single_utterance=True + ) + + def request_generator(config, audio_file_path): + + # The first request contains the configuration. + # Note that audio_content is explicitly set to None. + yield mediatranslation.StreamingTranslateSpeechRequest(streaming_config=config) + + with open(audio_file_path, "rb") as audio: + while True: + chunk = audio.read(4096) + if not chunk: + break + yield mediatranslation.StreamingTranslateSpeechRequest( + audio_content=chunk + ) + + requests = request_generator(streaming_config, file_path) + responses = client.streaming_translate_speech(requests) + + for response in responses: + # Once the transcription settles, the response contains the + # is_final result. The other results will be for subsequent portions of + # the audio. + print(f"Response: {response}") + result = response.result + translation = result.text_translation_result.translation + + if result.text_translation_result.is_final: + print("\nFinal translation: {0}".format(translation)) + break + + print("\nPartial translation: {0}".format(translation)) + # [END mediatranslation_translate_from_file] diff --git a/media-translation/snippets/translate_from_file_test.py b/media-translation/snippets/translate_from_file_test.py new file mode 100644 index 00000000000..0abbe97df1b --- /dev/null +++ b/media-translation/snippets/translate_from_file_test.py @@ -0,0 +1,27 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import re + +import translate_from_file + +RESOURCES = os.path.join(os.path.dirname(__file__), "resources") + + +def test_translate_streaming(capsys): + translate_from_file.translate_from_file(os.path.join(RESOURCES, "audio.raw")) + out, err = capsys.readouterr() + + assert re.search(r"Partial translation", out, re.DOTALL | re.I) diff --git a/media-translation/snippets/translate_from_mic.py b/media-translation/snippets/translate_from_mic.py new file mode 100644 index 00000000000..a05d4292a51 --- /dev/null +++ b/media-translation/snippets/translate_from_mic.py @@ -0,0 +1,175 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Cloud Media Translation API sample application using a microphone. + +Example usage: + python translate_from_mic.py +""" + +# [START mediatranslation_translate_from_mic] +from __future__ import division + +import itertools + +from google.cloud import mediatranslation as media +import pyaudio +from six.moves import queue + +# Audio recording parameters +RATE = 16000 +CHUNK = int(RATE / 10) # 100ms +SpeechEventType = media.StreamingTranslateSpeechResponse.SpeechEventType + + +class MicrophoneStream: + """Opens a recording stream as a generator yielding the audio chunks.""" + + def __init__(self, rate, chunk): + self._rate = rate + self._chunk = chunk + + # Create a thread-safe buffer of audio data + self._buff = queue.Queue() + self.closed = True + + def __enter__(self): + self._audio_interface = pyaudio.PyAudio() + self._audio_stream = self._audio_interface.open( + format=pyaudio.paInt16, + channels=1, + rate=self._rate, + input=True, + frames_per_buffer=self._chunk, + # Run the audio stream asynchronously to fill the buffer object. + # This is necessary so that the input device's buffer doesn't + # overflow while the calling thread makes network requests, etc. + stream_callback=self._fill_buffer, + ) + + self.closed = False + + return self + + def __exit__(self, type=None, value=None, traceback=None): + self._audio_stream.stop_stream() + self._audio_stream.close() + self.closed = True + # Signal the generator to terminate so that the client's + # streaming_recognize method will not block the process termination. + self._buff.put(None) + self._audio_interface.terminate() + + def _fill_buffer(self, in_data, frame_count, time_info, status_flags): + """Continuously collect data from the audio stream, into the buffer.""" + self._buff.put(in_data) + return None, pyaudio.paContinue + + def exit(self): + self.__exit__() + + def generator(self): + while not self.closed: + # Use a blocking get() to ensure there's at least one chunk of + # data, and stop iteration if the chunk is None, indicating the + # end of the audio stream. + chunk = self._buff.get() + if chunk is None: + return + data = [chunk] + + # Now consume whatever other data's still buffered. + while True: + try: + chunk = self._buff.get(block=False) + if chunk is None: + return + data.append(chunk) + except queue.Empty: + break + + yield b"".join(data) + + +def listen_print_loop(responses): + """Iterates through server responses and prints them. + + The responses passed is a generator that will block until a response + is provided by the server. + """ + translation = "" + for response in responses: + # Once the transcription settles, the response contains the + # END_OF_SINGLE_UTTERANCE event. + if response.speech_event_type == SpeechEventType.END_OF_SINGLE_UTTERANCE: + + print("\nFinal translation: {0}".format(translation)) + return 0 + + result = response.result + translation = result.text_translation_result.translation + + print("\nPartial translation: {0}".format(translation)) + + +def do_translation_loop(): + print("Begin speaking...") + + client = media.SpeechTranslationServiceClient() + + speech_config = media.TranslateSpeechConfig( + audio_encoding="linear16", + source_language_code="en-US", + target_language_code="es-ES", + ) + + config = media.StreamingTranslateSpeechConfig( + audio_config=speech_config, single_utterance=True + ) + + # The first request contains the configuration. + # Note that audio_content is explicitly set to None. + first_request = media.StreamingTranslateSpeechRequest(streaming_config=config) + + with MicrophoneStream(RATE, CHUNK) as stream: + audio_generator = stream.generator() + mic_requests = ( + media.StreamingTranslateSpeechRequest(audio_content=content) + for content in audio_generator + ) + + requests = itertools.chain(iter([first_request]), mic_requests) + + responses = client.streaming_translate_speech(requests) + + # Print the translation responses as they arrive + result = listen_print_loop(responses) + if result == 0: + stream.exit() + + +def main(): + while True: + print() + option = input("Press any key to translate or 'q' to quit: ") + + if option.lower() == "q": + break + + do_translation_loop() + + +if __name__ == "__main__": + main() +# [END mediatranslation_translate_from_mic]