Skip to content

Commit 3f3e0be

Browse files
authored
Merge pull request #842 from ftnext/test-recognize-vosk
Add test recognize_vosk()
2 parents 203c73a + 898aef1 commit 3f3e0be

File tree

4 files changed

+76
-3
lines changed

4 files changed

+76
-3
lines changed

.github/workflows/unittests.yml

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,16 +46,18 @@ jobs:
4646
- name: Install Python dependencies (Ubuntu, <=3.12)
4747
if: matrix.os == 'ubuntu-latest' && matrix.python-version != '3.13'
4848
run: |
49-
python -m pip install .[dev,audio,pocketsphinx,google-cloud,whisper-local,faster-whisper,openai,groq]
49+
python -m pip install .[dev,audio,pocketsphinx,google-cloud,whisper-local,faster-whisper,openai,groq,vosk]
5050
- name: Install Python dependencies (Ubuntu, 3.13)
5151
if: matrix.os == 'ubuntu-latest' && matrix.python-version == '3.13'
5252
run: |
5353
python -m pip install standard-aifc setuptools
54-
python -m pip install --no-build-isolation .[dev,audio,pocketsphinx,google-cloud,openai,groq]
54+
python -m pip install --no-build-isolation .[dev,audio,pocketsphinx,google-cloud,openai,groq,vosk]
5555
- name: Install Python dependencies (Windows)
5656
if: matrix.os == 'windows-latest'
5757
run: |
58-
python -m pip install .[dev,whisper-local,faster-whisper,google-cloud,openai,groq]
58+
python -m pip install .[dev,whisper-local,faster-whisper,google-cloud,openai,groq,vosk]
59+
- name: Set up vosk model
60+
run: pipx run setup_vosk.py
5961
- name: Test with unittest
6062
run: |
6163
pytest --doctest-modules -v speech_recognition/recognizers/ tests/

setup.cfg

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,3 +23,5 @@ groq =
2323
httpx < 0.28
2424
assemblyai =
2525
requests
26+
vosk =
27+
vosk

setup_vosk.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
# /// script
2+
# requires-python = ">=3.9"
3+
# dependencies = [
4+
# "requests",
5+
# "tqdm",
6+
# ]
7+
# ///
8+
import os
9+
import shutil
10+
import tempfile
11+
import zipfile
12+
13+
import requests
14+
from tqdm import tqdm
15+
16+
17+
def setup_vosk_model(model_url: str, model_dir: str) -> None:
18+
model_filename = os.path.basename(model_url)
19+
model_name = os.path.splitext(model_filename)[0]
20+
21+
print(f"Downloading model {model_filename} ...")
22+
response = requests.get(model_url, stream=True)
23+
response.raise_for_status()
24+
total_size = int(response.headers.get("content-length", 0))
25+
26+
with tempfile.TemporaryDirectory() as temp_dir:
27+
download_path = os.path.join(temp_dir, model_filename)
28+
with open(download_path, "wb") as f:
29+
with tqdm(total=total_size, unit="B", unit_scale=True) as pbar:
30+
for chunk in response.iter_content(chunk_size=8192):
31+
if chunk:
32+
f.write(chunk)
33+
pbar.update(len(chunk))
34+
35+
print("Unzip model...")
36+
with zipfile.ZipFile(download_path, "r") as zip_ref:
37+
zip_ref.extractall(temp_dir)
38+
39+
extracted_dir = os.path.join(temp_dir, model_name)
40+
if os.path.exists(model_dir):
41+
shutil.rmtree(model_dir)
42+
shutil.copytree(extracted_dir, model_dir)
43+
44+
print(f"Setup complete! Model is placed in the directory: {model_dir}")
45+
46+
47+
if __name__ == "__main__":
48+
model_url = (
49+
"https://alphacephei.com/vosk/models/vosk-model-small-en-us-0.15.zip"
50+
)
51+
setup_vosk_model(model_url, "model")

tests/recognizers/test_vosk.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
from pathlib import Path
2+
3+
from speech_recognition import AudioData, Recognizer
4+
5+
6+
def test_recognize_vosk():
7+
audio_file = str(Path(__file__).parent.parent / "english.wav")
8+
audio_data = AudioData.from_file(audio_file)
9+
sut = Recognizer()
10+
11+
actual = sut.recognize_vosk(audio_data)
12+
13+
expected = """\
14+
{
15+
"text" : "one two three"
16+
}\
17+
"""
18+
assert actual == expected

0 commit comments

Comments
 (0)