@@ -147,7 +147,8 @@ def start_server():
147147 augmentation_config = '{}' ,
148148 specgram_type = args .specgram_type ,
149149 num_threads = 1 ,
150- keep_transcription_text = True )
150+ keep_transcription_text = True ,
151+ num_conv_layers = args .num_conv_layers )
151152 # prepare ASR model
152153 ds2_model = DeepSpeech2Model (
153154 vocab_size = data_generator .vocab_size ,
@@ -163,8 +164,20 @@ def start_server():
163164 # prepare ASR inference handler
164165 def file_to_transcript (filename ):
165166 feature = data_generator .process_utterance (filename , "" )
167+ ins = []
168+ conv0_h = (feature [0 ].shape [0 ] - 1 ) // 2 + 1
169+ conv0_w = (feature [0 ].shape [1 ] - 1 ) // 3 + 1
170+ ins += [feature [0 ], feature [1 ],
171+ [0 ], [conv0_w ],
172+ [1 , 32 , 1 , conv0_h , conv0_w + 1 , conv0_w ]]
173+ pre_h = conv0_h
174+ for i in xrange (args .num_conv_layers - 1 ):
175+ h = (pre_h - 1 ) // 2 + 1
176+ pre_h = h
177+ ins += [[1 , 32 , 1 , h , conv0_w + 1 , conv0_w ]]
178+
166179 result_transcript = ds2_model .infer_batch (
167- infer_data = [feature ],
180+ infer_data = [ins ],
168181 decoding_method = args .decoding_method ,
169182 beam_alpha = args .alpha ,
170183 beam_beta = args .beta ,
@@ -173,7 +186,8 @@ def file_to_transcript(filename):
173186 cutoff_top_n = args .cutoff_top_n ,
174187 vocab_list = vocab_list ,
175188 language_model_path = args .lang_model_path ,
176- num_processes = 1 )
189+ num_processes = 1 ,
190+ feeding_dict = data_generator .feeding )
177191 return result_transcript [0 ]
178192
179193 # warming up with utterrances sampled from Librispeech
0 commit comments