Skip to content

Commit c4cb90f

Browse files
authored
Merge pull request #797 from ftnext/feature/groq-support
Support Groq whisper
2 parents 73c8cdf + 698cd76 commit c4cb90f

File tree

7 files changed

+139
-9
lines changed

7 files changed

+139
-9
lines changed

.github/workflows/unittests.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,16 +44,16 @@ jobs:
4444
- name: Install Python dependencies (Ubuntu, <=3.12)
4545
if: matrix.os == 'ubuntu-latest' && matrix.python-version != '3.13'
4646
run: |
47-
python -m pip install .[dev,audio,pocketsphinx,whisper-local,whisper-api]
47+
python -m pip install .[dev,audio,pocketsphinx,whisper-local,whisper-api,groq]
4848
- name: Install Python dependencies (Ubuntu, 3.13)
4949
if: matrix.os == 'ubuntu-latest' && matrix.python-version == '3.13'
5050
run: |
5151
python -m pip install standard-aifc setuptools
52-
python -m pip install --no-build-isolation .[dev,audio,pocketsphinx,whisper-api]
52+
python -m pip install --no-build-isolation .[dev,audio,pocketsphinx,whisper-api,groq]
5353
- name: Install Python dependencies (Windows)
5454
if: matrix.os == 'windows-latest'
5555
run: |
56-
python -m pip install .[dev,whisper-local,whisper-api]
56+
python -m pip install .[dev,whisper-local,whisper-api,groq]
5757
- name: Test with unittest
5858
run: |
5959
pytest --doctest-modules -v speech_recognition/recognizers/ tests/

README.rst

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,8 @@ Speech recognition engine/API support:
3939
* `Tensorflow <https://www.tensorflow.org/>`__
4040
* `Vosk API <https://github.com/alphacep/vosk-api/>`__ (works offline)
4141
* `OpenAI whisper <https://github.com/openai/whisper>`__ (works offline)
42-
* `Whisper API <https://platform.openai.com/docs/guides/speech-to-text>`__
42+
* `OpenAI Whisper API <https://platform.openai.com/docs/guides/speech-to-text>`__
43+
* `Groq Whisper API <https://console.groq.com/docs/speech-text>`__
4344

4445
**Quickstart:** ``pip install SpeechRecognition``. See the "Installing" section for more details.
4546

@@ -96,7 +97,8 @@ To use all of the functionality of the library, you should have:
9697
* **FLAC encoder** (required only if the system is not x86-based Windows/Linux/OS X)
9798
* **Vosk** (required only if you need to use Vosk API speech recognition ``recognizer_instance.recognize_vosk``)
9899
* **Whisper** (required only if you need to use Whisper ``recognizer_instance.recognize_whisper``)
99-
* **openai** (required only if you need to use Whisper API speech recognition ``recognizer_instance.recognize_whisper_api``)
100+
* **openai** (required only if you need to use OpenAI Whisper API speech recognition ``recognizer_instance.recognize_whisper_api``)
101+
* **groq** (required only if you need to use Groq Whisper API speech recognition ``recognizer_instance.recognize_groq``)
100102

101103
The following requirements are optional, but can improve or extend functionality in some situations:
102104

@@ -171,15 +173,24 @@ Whisper is **required if and only if you want to use whisper** (``recognizer_ins
171173

172174
You can install it with ``python3 -m pip install SpeechRecognition[whisper-local]``.
173175

174-
Whisper API (for Whisper API users)
175-
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
176+
OpenAI Whisper API (for OpenAI Whisper API users)
177+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
176178

177-
The library `openai <https://pypi.org/project/openai/>`__ is **required if and only if you want to use Whisper API** (``recognizer_instance.recognize_whisper_api``).
179+
The library `openai <https://pypi.org/project/openai/>`__ is **required if and only if you want to use OpenAI Whisper API** (``recognizer_instance.recognize_whisper_api``).
178180

179181
If not installed, everything in the library will still work, except calling ``recognizer_instance.recognize_whisper_api`` will raise an ``RequestError``.
180182

181183
You can install it with ``python3 -m pip install SpeechRecognition[whisper-api]``.
182184

185+
Groq Whisper API (for Groq Whisper API users)
186+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
187+
188+
The library `groq <https://pypi.org/project/groq/>`__ is **required if and only if you want to use Groq Whisper API** (``recognizer_instance.recognize_groq``).
189+
190+
If not installed, everything in the library will still work, except calling ``recognizer_instance.recognize_groq`` will raise an ``RequestError``.
191+
192+
You can install it with ``python3 -m pip install SpeechRecognition[groq]``.
193+
183194
Troubleshooting
184195
---------------
185196

setup.cfg

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ dev =
44
rstcheck
55
pytest
66
pytest-randomly
7+
respx
78
audio =
89
PyAudio >= 0.2.11
910
pocketsphinx =
@@ -13,5 +14,9 @@ whisper-local =
1314
soundfile
1415
whisper-api =
1516
openai
17+
httpx < 0.28
18+
groq =
19+
groq
20+
httpx < 0.28
1621
assemblyai =
1722
requests

speech_recognition/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1506,12 +1506,13 @@ def flush(self, *args, **kwargs):
15061506
# At this time, the dependencies are not yet installed, resulting in a ModuleNotFoundError.
15071507
# This is a workaround to resolve this issue
15081508
try:
1509-
from .recognizers import google, whisper
1509+
from .recognizers import google, groq, whisper
15101510
except (ModuleNotFoundError, ImportError):
15111511
pass
15121512
else:
15131513
Recognizer.recognize_google = google.recognize_legacy
15141514
Recognizer.recognize_whisper_api = whisper.recognize_whisper_api
1515+
Recognizer.recognize_groq = groq.recognize_groq
15151516

15161517

15171518
# ===============================
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
from __future__ import annotations
2+
3+
import os
4+
from typing import Literal, TypedDict
5+
from typing_extensions import Unpack
6+
7+
from speech_recognition.audio import AudioData
8+
from speech_recognition.exceptions import SetupError
9+
from speech_recognition.recognizers.whisper_api import (
10+
OpenAICompatibleRecognizer,
11+
)
12+
13+
# https://console.groq.com/docs/speech-text#supported-models
14+
GroqModel = Literal[
15+
"whisper-large-v3-turbo", "whisper-large-v3", "distil-whisper-large-v3-en"
16+
]
17+
18+
19+
class GroqOptionalParameters(TypedDict):
20+
"""Groq speech transcription's optional parameters.
21+
22+
https://console.groq.com/docs/speech-text#transcription-endpoint-usage
23+
"""
24+
25+
prompt: str
26+
response_format: str
27+
temperature: float
28+
language: str
29+
30+
31+
def recognize_groq(
32+
recognizer,
33+
audio_data: "AudioData",
34+
*,
35+
model: GroqModel = "whisper-large-v3-turbo",
36+
**kwargs: Unpack[GroqOptionalParameters],
37+
) -> str:
38+
"""
39+
Performs speech recognition on ``audio_data`` (an ``AudioData`` instance), using the Groq Whisper API.
40+
41+
This function requires login to Groq; visit https://console.groq.com/login, then generate API Key in `API Keys <https://console.groq.com/keys>`__ menu.
42+
43+
Detail: https://console.groq.com/docs/speech-text
44+
45+
Raises a ``speech_recognition.exceptions.SetupError`` exception if there are any issues with the groq installation, or the environment variable is missing.
46+
"""
47+
if os.environ.get("GROQ_API_KEY") is None:
48+
raise SetupError("Set environment variable ``GROQ_API_KEY``")
49+
50+
try:
51+
import groq
52+
except ImportError:
53+
raise SetupError(
54+
"missing groq module: ensure that groq is set up correctly."
55+
)
56+
57+
recognizer = OpenAICompatibleRecognizer(groq.Groq())
58+
return recognizer.recognize(audio_data, model)
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
from io import BytesIO
2+
3+
from speech_recognition.audio import AudioData
4+
5+
6+
class OpenAICompatibleRecognizer:
7+
def __init__(self, client) -> None:
8+
self.client = client
9+
10+
def recognize(self, audio_data: "AudioData", model: str, **kwargs) -> str:
11+
if not isinstance(audio_data, AudioData):
12+
raise ValueError(
13+
"``audio_data`` must be an ``AudioData`` instance"
14+
)
15+
16+
wav_data = BytesIO(audio_data.get_wav_data())
17+
wav_data.name = "SpeechRecognition_audio.wav"
18+
19+
transcript = self.client.audio.transcriptions.create(
20+
file=wav_data, model=model, **kwargs
21+
)
22+
return transcript.text

tests/recognizers/test_groq.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
from unittest.mock import MagicMock
2+
3+
import httpx
4+
import respx
5+
6+
from speech_recognition import AudioData, Recognizer
7+
from speech_recognition.recognizers import groq
8+
9+
10+
@respx.mock(assert_all_called=True, assert_all_mocked=True)
11+
def test_transcribe_with_groq_whisper(respx_mock, monkeypatch):
12+
monkeypatch.setenv("GROQ_API_KEY", "gsk_grok_api_key")
13+
14+
respx_mock.post(
15+
"https://api.groq.com/openai/v1/audio/transcriptions"
16+
).mock(
17+
return_value=httpx.Response(
18+
200,
19+
json={
20+
"text": "Transcription by Groq Whisper",
21+
"x_groq": {"id": "req_unique_id"},
22+
},
23+
)
24+
)
25+
26+
audio_data = MagicMock(spec=AudioData)
27+
audio_data.get_wav_data.return_value = b"audio_data"
28+
29+
actual = groq.recognize_groq(
30+
MagicMock(spec=Recognizer), audio_data, model="whisper-large-v3"
31+
)
32+
33+
assert actual == "Transcription by Groq Whisper"

0 commit comments

Comments
 (0)