22import argparse
33import csv
44import os
5+ import re
56import sys
67import time
78import subprocess
@@ -41,11 +42,30 @@ def init_argparse() -> argparse.ArgumentParser:
4142 help = "Voice to use"
4243 )
4344
45+ parser .add_argument ('-l' ,
46+ '--locale' ,
47+ type = str ,
48+ help = "Language locale" ,
49+ required = False ,
50+ )
51+
4452 parser .add_argument ('langdir' ,
4553 type = str ,
4654 help = "Language subfolder"
4755 )
4856
57+ parser .add_argument ('-p' ,
58+ '--pitch' ,
59+ help = "Pitch adjustment" ,
60+ type = str ,
61+ default = "default" )
62+
63+ parser .add_argument ('-r' ,
64+ '--rate' ,
65+ help = "Rate adjustment" ,
66+ type = str ,
67+ default = "default" )
68+
4969 parser .add_argument ('-s' ,
5070 '--delay' ,
5171 type = int ,
@@ -67,8 +87,15 @@ def main() -> None:
6787 langdir = args .langdir
6888 basedir = os .path .dirname (os .path .abspath (__file__ ))
6989 outdir = ""
90+ pitch = args .pitch
91+ rate = args .rate
7092 delay_time = args .delay
7193
94+ if args .locale is not None :
95+ locale = args .locale
96+ else :
97+ locale = re .split ('([a-z]{2}-[A-Z]{2})' , voice )[1 ]
98+
7299 try :
73100 speech_key = os .environ ['COGNITIVE_SERVICE_API_KEY' ]
74101 service_region = os .environ ['SERVICE_REGION' ]
@@ -132,12 +159,19 @@ def main() -> None:
132159 if not os .path .isfile (outfile ):
133160 print (
134161 f'[{ line_count } /{ csv_rows } ] Translate "{ en_text } " to "{ text } ", save as "{ outdir } { os .sep } { filename } ".' )
135- speech_config .speech_synthesis_voice_name = voice
136162 audio_config = speechsdk .audio .AudioOutputConfig (
137163 filename = outfile )
138164 synthesizer = speechsdk .SpeechSynthesizer (
139165 speech_config = speech_config , audio_config = audio_config )
140- result = synthesizer .speak_text_async (text ).get ()
166+
167+ ssml_text = f"""
168+ <speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="https://www.w3.org/2001/mstts" xml:lang="{ locale } ">
169+ <voice name="{ voice } ">
170+ <prosody pitch="{ pitch } " rate="{ rate } ">{ text } </prosody>
171+ </voice>
172+ </speak>"""
173+
174+ result = synthesizer .speak_ssml_async (ssml = ssml_text ).get ()
141175
142176 # If failed, show error, remove empty/corrupt file and halt
143177 if result .reason == speechsdk .ResultReason .Canceled :
0 commit comments