Skip to content

Commit ba6e549

Browse files
committed
added on_recorded_chunk cb
1 parent e5613ca commit ba6e549

4 files changed

Lines changed: 31 additions & 17 deletions

File tree

RealtimeSTT/audio_recorder.py

Lines changed: 29 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,8 @@ def __init__(self,
125125
on_wakeword_timeout=None,
126126
on_wakeword_detection_start=None,
127127
on_wakeword_detection_end=None,
128+
on_recorded_chunk=None,
129+
debug_mode=False
128130
):
129131
"""
130132
Initializes an audio recorder and transcription
@@ -246,6 +248,11 @@ def __init__(self,
246248
- on_wakeword_detection_end (callable, default=None): Callback
247249
function to be called when the system stops to listen for
248250
wake words (e.g. because of timeout or wake word detected)
251+
- on_recorded_chunk (callable, default=None): Callback function to be
252+
called when a chunk of audio is recorded. The function is called
253+
with the recorded audio chunk as its argument.
254+
- debug_mode (bool, default=False): If set to True, the system will
255+
print additional debug information to the console.
249256
250257
Raises:
251258
Exception: Errors related to initializing transcription
@@ -278,6 +285,7 @@ def __init__(self,
278285
self.on_vad_detect_stop = on_vad_detect_stop
279286
self.on_wakeword_detection_start = on_wakeword_detection_start
280287
self.on_wakeword_detection_end = on_wakeword_detection_end
288+
self.on_recorded_chunk = on_recorded_chunk
281289
self.on_transcription_start = on_transcription_start
282290
self.enable_realtime_transcription = enable_realtime_transcription
283291
self.realtime_model_type = realtime_model_type
@@ -288,6 +296,7 @@ def __init__(self,
288296
self.on_realtime_transcription_stabilized = (
289297
on_realtime_transcription_stabilized
290298
)
299+
self.debug_mode = debug_mode
291300
self.allowed_latency_limit = ALLOWED_LATENCY_LIMIT
292301

293302
self.level = level
@@ -578,9 +587,6 @@ def _transcription_worker(conn,
578587
transcription = " ".join(seg.text for seg in segments)
579588
transcription = transcription.strip()
580589
conn.send(('success', transcription))
581-
except faster_whisper.WhisperError as e:
582-
logging.error(f"Whisper transcription error: {e}")
583-
conn.send(('error', str(e)))
584590
except Exception as e:
585591
logging.error(f"General transcription error: {e}")
586592
conn.send(('error', str(e)))
@@ -633,13 +639,14 @@ def _audio_data_worker(audio_queue,
633639

634640
try:
635641
audio_interface = pyaudio.PyAudio()
636-
stream = audio_interface.open(rate=sample_rate,
637-
format=pyaudio.paInt16,
638-
channels=1,
639-
input=True,
640-
frames_per_buffer=buffer_size,
641-
input_device_index=input_device_index,
642-
)
642+
stream = audio_interface.open(
643+
rate=sample_rate,
644+
format=pyaudio.paInt16,
645+
channels=1,
646+
input=True,
647+
frames_per_buffer=buffer_size,
648+
input_device_index=input_device_index,
649+
)
643650

644651
except Exception as e:
645652
logging.exception("Error initializing pyaudio "
@@ -978,6 +985,8 @@ def _recording_worker(self):
978985
try:
979986

980987
data = self.audio_queue.get()
988+
if self.on_recorded_chunk:
989+
self.on_recorded_chunk(data)
981990

982991
# Handle queue overflow
983992
queue_overflow_logged = False
@@ -1326,10 +1335,20 @@ def _is_webrtc_speech(self, data, all_frames_must_be_true=False):
13261335
if self.webrtc_vad_model.is_speech(frame, self.sample_rate):
13271336
speech_frames += 1
13281337
if not all_frames_must_be_true:
1338+
if self.debug_mode:
1339+
print(f"Speech detected in frame {i + 1}"
1340+
f" of {num_frames}")
13291341
return True
13301342
if all_frames_must_be_true:
1343+
if self.debug_mode and speech_frames == num_frames:
1344+
print(f"Speech detected in {speech_frames} of "
1345+
f"{num_frames} frames")
1346+
elif self.debug_mode:
1347+
print(f"Speech not detected in all {num_frames} frames")
13311348
return speech_frames == num_frames
13321349
else:
1350+
if self.debug_mode:
1351+
print(f"Speech not detected in any of {num_frames} frames")
13331352
return False
13341353

13351354
def _check_voice_activity(self, data):

requirements-gpu.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
PyAudio==0.2.14
2-
faster-whisper==0.10.0
2+
faster-whisper==1.0.1
33
pvporcupine==1.9.5
44
webrtcvad==2.0.10
55
halo==0.0.31

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
PyAudio==0.2.14
2-
faster-whisper==0.10.0
2+
faster-whisper==1.0.1
33
pvporcupine==1.9.5
44
webrtcvad==2.0.10
55
halo==0.0.31

requirements_raw.txt

Lines changed: 0 additions & 5 deletions
This file was deleted.

0 commit comments

Comments
 (0)