77}
88
99
10- def do_stt ():
10+ def do_stt (audio , text_state = "" ):
1111 transcription = ""
1212 r = sr .Recognizer ()
13- with sr . Microphone () as source :
14- r . adjust_for_ambient_noise ( source , 0.2 )
15- audio = r . listen ( source )
13+
14+ # Convert to AudioData
15+ audio_data = sr . AudioData ( sample_rate = audio [ 0 ], frame_data = audio [ 1 ], sample_width = 4 )
1616
1717 try :
18- transcription = r .recognize_whisper (audio , language = "english" , model = "base.en" )
18+ transcription = r .recognize_whisper (audio_data , language = "english" , model = "base.en" )
1919 except sr .UnknownValueError :
2020 print ("Whisper could not understand audio" )
2121 except sr .RequestError as e :
2222 print ("Could not request results from Whisper" , e )
2323
2424 input_hijack .update ({"state" : True , "value" : [transcription , transcription ]})
25- return transcription
25+
26+ text_state += transcription + " "
27+ return text_state , text_state
2628
2729
2830def update_hijack (val ):
@@ -31,7 +33,12 @@ def update_hijack(val):
3133
3234
3335def ui ():
34- speech_button = gr .Button (value = "🎙️" )
35- output_transcription = gr .Textbox (label = "STT-Input" , placeholder = "Speech Preview. Click \" Generate\" to send" , interactive = True )
36- output_transcription .change (fn = update_hijack , inputs = [output_transcription ])
37- speech_button .click (do_stt , outputs = [output_transcription ])
36+ tr_state = gr .State (value = "" )
37+ output_transcription = gr .Textbox (label = "STT-Input" ,
38+ placeholder = "Speech Preview. Click \" Generate\" to send" ,
39+ interactive = True )
40+ output_transcription .change (fn = update_hijack , inputs = [output_transcription ], outputs = [tr_state ])
41+ with gr .Row ():
42+ audio = gr .Audio (source = "microphone" )
43+ transcribe_button = gr .Button (value = "Transcribe" )
44+ transcribe_button .click (do_stt , inputs = [audio , tr_state ], outputs = [output_transcription , tr_state ])
0 commit comments