Skip to content

Commit ced17db

Browse files
committed
chore: Change to SSML on the fly for pitch/rate
To provision a request that was in #51
1 parent 8f7fe11 commit ced17db

1 file changed

Lines changed: 36 additions & 2 deletions

File tree

voice-gen.py

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import argparse
33
import csv
44
import os
5+
import re
56
import sys
67
import time
78
import subprocess
@@ -41,11 +42,30 @@ def init_argparse() -> argparse.ArgumentParser:
4142
help="Voice to use"
4243
)
4344

45+
parser.add_argument('-l',
46+
'--locale',
47+
type=str,
48+
help="Language locale",
49+
required=False,
50+
)
51+
4452
parser.add_argument('langdir',
4553
type=str,
4654
help="Language subfolder"
4755
)
4856

57+
parser.add_argument('-p',
58+
'--pitch',
59+
help="Pitch adjustment",
60+
type=str,
61+
default="default")
62+
63+
parser.add_argument('-r',
64+
'--rate',
65+
help="Rate adjustment",
66+
type=str,
67+
default="default")
68+
4969
parser.add_argument('-s',
5070
'--delay',
5171
type=int,
@@ -67,8 +87,15 @@ def main() -> None:
6787
langdir = args.langdir
6888
basedir = os.path.dirname(os.path.abspath(__file__))
6989
outdir = ""
90+
pitch = args.pitch
91+
rate = args.rate
7092
delay_time = args.delay
7193

94+
if args.locale is not None:
95+
locale = args.locale
96+
else:
97+
locale = re.split('([a-z]{2}-[A-Z]{2})', voice)[1]
98+
7299
try:
73100
speech_key = os.environ['COGNITIVE_SERVICE_API_KEY']
74101
service_region = os.environ['SERVICE_REGION']
@@ -132,12 +159,19 @@ def main() -> None:
132159
if not os.path.isfile(outfile):
133160
print(
134161
f'[{line_count}/{csv_rows}] Translate "{en_text}" to "{text}", save as "{outdir}{os.sep}{filename}".')
135-
speech_config.speech_synthesis_voice_name = voice
136162
audio_config = speechsdk.audio.AudioOutputConfig(
137163
filename=outfile)
138164
synthesizer = speechsdk.SpeechSynthesizer(
139165
speech_config=speech_config, audio_config=audio_config)
140-
result = synthesizer.speak_text_async(text).get()
166+
167+
ssml_text = f"""
168+
<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="https://www.w3.org/2001/mstts" xml:lang="{locale}">
169+
<voice name="{voice}">
170+
<prosody pitch="{pitch}" rate="{rate}">{text}</prosody>
171+
</voice>
172+
</speak>"""
173+
174+
result = synthesizer.speak_ssml_async(ssml=ssml_text).get()
141175

142176
# If failed, show error, remove empty/corrupt file and halt
143177
if result.reason == speechsdk.ResultReason.Canceled:

0 commit comments

Comments
 (0)