Merge pull request #842 from ftnext/test-recognize-vosk

ftnext · web-flow · commit 3f3e0beaf0f6 · 2025-05-17T13:08:39.000+09:00
Add test recognize_vosk()
diff --git a/.github/workflows/unittests.yml b/.github/workflows/unittests.yml
@@ -46,16 +46,18 @@ jobs:
       - name: Install Python dependencies (Ubuntu, <=3.12)
         if: matrix.os == 'ubuntu-latest' && matrix.python-version != '3.13'
         run: |
-          python -m pip install .[dev,audio,pocketsphinx,google-cloud,whisper-local,faster-whisper,openai,groq]
+          python -m pip install .[dev,audio,pocketsphinx,google-cloud,whisper-local,faster-whisper,openai,groq,vosk]
       - name: Install Python dependencies (Ubuntu, 3.13)
         if: matrix.os == 'ubuntu-latest' && matrix.python-version == '3.13'
         run: |
           python -m pip install standard-aifc setuptools
-          python -m pip install --no-build-isolation .[dev,audio,pocketsphinx,google-cloud,openai,groq]
+          python -m pip install --no-build-isolation .[dev,audio,pocketsphinx,google-cloud,openai,groq,vosk]
       - name: Install Python dependencies (Windows)
         if: matrix.os == 'windows-latest'
         run: |
-          python -m pip install .[dev,whisper-local,faster-whisper,google-cloud,openai,groq]
+          python -m pip install .[dev,whisper-local,faster-whisper,google-cloud,openai,groq,vosk]
+      - name: Set up vosk model
+        run: pipx run setup_vosk.py
       - name: Test with unittest
         run: |
           pytest --doctest-modules -v speech_recognition/recognizers/ tests/
diff --git a/setup.cfg b/setup.cfg
@@ -23,3 +23,5 @@ groq =
     httpx < 0.28
 assemblyai =
     requests
+vosk =
+    vosk
diff --git a/setup_vosk.py b/setup_vosk.py
@@ -0,0 +1,51 @@
+# /// script
+# requires-python = ">=3.9"
+# dependencies = [
+#     "requests",
+#     "tqdm",
+# ]
+# ///
+import os
+import shutil
+import tempfile
+import zipfile
+
+import requests
+from tqdm import tqdm
+
+
+def setup_vosk_model(model_url: str, model_dir: str) -> None:
+    model_filename = os.path.basename(model_url)
+    model_name = os.path.splitext(model_filename)[0]
+
+    print(f"Downloading model {model_filename} ...")
+    response = requests.get(model_url, stream=True)
+    response.raise_for_status()
+    total_size = int(response.headers.get("content-length", 0))
+
+    with tempfile.TemporaryDirectory() as temp_dir:
+        download_path = os.path.join(temp_dir, model_filename)
+        with open(download_path, "wb") as f:
+            with tqdm(total=total_size, unit="B", unit_scale=True) as pbar:
+                for chunk in response.iter_content(chunk_size=8192):
+                    if chunk:
+                        f.write(chunk)
+                        pbar.update(len(chunk))
+
+        print("Unzip model...")
+        with zipfile.ZipFile(download_path, "r") as zip_ref:
+            zip_ref.extractall(temp_dir)
+
+        extracted_dir = os.path.join(temp_dir, model_name)
+        if os.path.exists(model_dir):
+            shutil.rmtree(model_dir)
+        shutil.copytree(extracted_dir, model_dir)
+
+    print(f"Setup complete! Model is placed in the directory: {model_dir}")
+
+
+if __name__ == "__main__":
+    model_url = (
+        "https://alphacephei.com/vosk/models/vosk-model-small-en-us-0.15.zip"
+    )
+    setup_vosk_model(model_url, "model")
diff --git a/tests/recognizers/test_vosk.py b/tests/recognizers/test_vosk.py
@@ -0,0 +1,18 @@
+from pathlib import Path
+
+from speech_recognition import AudioData, Recognizer
+
+
+def test_recognize_vosk():
+    audio_file = str(Path(__file__).parent.parent / "english.wav")
+    audio_data = AudioData.from_file(audio_file)
+    sut = Recognizer()
+
+    actual = sut.recognize_vosk(audio_data)
+
+    expected = """\
+{
+  "text" : "one two three"
+}\
+"""
+    assert actual == expected