Merge pull request #28 from pkuyym/fix-27

pkuyym · web-flow · commit f8c73f032b18 · 2017-11-21T15:33:08.000+08:00
Adapt demo_server.py to support padding removing.
diff --git a/deploy/demo_server.py b/deploy/demo_server.py
@@ -147,7 +147,8 @@ def start_server():
         augmentation_config='{}',
         specgram_type=args.specgram_type,
         num_threads=1,
-        keep_transcription_text=True)
+        keep_transcription_text=True,
+        num_conv_layers=args.num_conv_layers)
     # prepare ASR model
     ds2_model = DeepSpeech2Model(
         vocab_size=data_generator.vocab_size,
@@ -163,8 +164,20 @@ def start_server():
     # prepare ASR inference handler
     def file_to_transcript(filename):
         feature = data_generator.process_utterance(filename, "")
+        ins = []
+        conv0_h = (feature[0].shape[0] - 1) // 2 + 1
+        conv0_w = (feature[0].shape[1] - 1) // 3 + 1
+        ins += [feature[0], feature[1],
+                [0], [conv0_w],
+                [1, 32, 1, conv0_h, conv0_w + 1, conv0_w]]
+        pre_h = conv0_h
+        for i in xrange(args.num_conv_layers - 1):
+            h = (pre_h - 1) // 2 + 1
+            pre_h = h
+            ins += [[1, 32, 1, h, conv0_w + 1, conv0_w]]
+
         result_transcript = ds2_model.infer_batch(
-            infer_data=[feature],
+            infer_data=[ins],
             decoding_method=args.decoding_method,
             beam_alpha=args.alpha,
             beam_beta=args.beta,
@@ -173,7 +186,8 @@ def file_to_transcript(filename):
             cutoff_top_n=args.cutoff_top_n,
             vocab_list=vocab_list,
             language_model_path=args.lang_model_path,
-            num_processes=1)
+            num_processes=1,
+            feeding_dict=data_generator.feeding)
         return result_transcript[0]
 
     # warming up with utterrances sampled from Librispeech