Merge pull request #44 from NavodPeiris/dev

NavodPeiris · web-flow · commit 3d614cdb4347 · 2024-08-20T12:33:11.000+05:30
added missing cuda support for custom and hf models
diff --git a/setup.py b/setup.py
@@ -5,7 +5,7 @@
 
 setup(
     name="speechlib",
-    version="1.1.4",  
+    version="1.1.5",  
     description="speechlib is a library that can do speaker diarization, transcription and speaker recognition on an audio file to create transcripts with actual speaker names. This library also contain audio preprocessor functions.",
     packages=find_packages(),
     long_description=long_description,
diff --git a/setup_instruction.md b/setup_instruction.md
@@ -9,7 +9,7 @@ for publishing:
     pip install twine
 
 for install locally for testing:
-    pip install dist/speechlib-1.1.4-py3-none-any.whl
+    pip install dist/speechlib-1.1.5-py3-none-any.whl
 
 finally run:
     twine upload dist/*
diff --git a/speechlib/transcribe.py b/speechlib/transcribe.py
@@ -53,11 +53,11 @@ def transcribe(file, language, model_size, model_type, quantization, custom_mode
             print("model fodler: ", model_folder)
             try:
                 if torch.cuda.is_available():
-                    model = whisper.load_model(custom_model_path, download_root=model_folder)
+                    model = whisper.load_model(custom_model_path, download_root=model_folder, device="cuda")
                     result = model.transcribe(file, language=language, fp16=True)
                     res = result["text"]
                 else:
-                    model = whisper.load_model(custom_model_path, download_root=model_folder)
+                    model = whisper.load_model(custom_model_path, download_root=model_folder, device="cpu")
                     result = model.transcribe(file, language=language, fp16=False)
                     res = result["text"]
 
@@ -66,9 +66,14 @@ def transcribe(file, language, model_size, model_type, quantization, custom_mode
                 raise Exception(f"an error occured while transcribing: {err}")
         elif model_type == "huggingface":
             try:
-                pipe = pipeline("automatic-speech-recognition", model=hf_model_path)
-                result = pipe(file)
-                res = result['text']
+                if torch.cuda.is_available():
+                    pipe = pipeline("automatic-speech-recognition", model=hf_model_path, device="cuda")
+                    result = pipe(file)
+                    res = result['text']
+                else:
+                    pipe = pipeline("automatic-speech-recognition", model=hf_model_path, device="cpu")
+                    result = pipe(file)
+                    res = result['text']
                 return res
             except Exception as err:
                 raise Exception(f"an error occured while transcribing: {err}")