From 1356cbed1bf499c952184453197a474fadbfd652 Mon Sep 17 00:00:00 2001 From: Nicolas Pavie Date: Mon, 31 Mar 2025 07:50:37 +0200 Subject: [PATCH 1/5] [modules] Add AWS polly adapter for TTS The original code was provided by ONCE. The adapter was then updated for the latest version of Pipeline and refactored to match the structure of other adapters (notably the Azure and Google ones). The adapter was also updated to allow for the use of generative, long-form, or neural voices if available for the user's AWS account. The main changes compared to original code from ONCE are: - Updated the service and engine classes to mimic Azure adapter. - Added the following configuration properties for connecting to AWS: - org.daisy.pipeline.tts.aws.accesskey - org.daisy.pipeline.tts.aws.secretkey - org.daisy.pipeline.tts.aws.region - Omit namespace prefixes from SSML as AWS seems to refuse it. - Cleaned up tests files --- modules/bom/pom.xml | 144 ++++++++++++++ modules/pom.xml | 1 + .../tts/tts-adapters/tts-adapter-aws/pom.xml | 73 +++++++ .../tts/aws/impl/AWSPollyTTSEngine.java | 183 ++++++++++++++++++ .../tts/aws/impl/AWSPollyTTSService.java | 91 +++++++++ .../src/main/resources/transform-ssml.xsl | 36 ++++ .../tts/aws/impl/AWSPollyTTSServiceTest.java | 92 +++++++++ .../src/test/resources/logback.xml | 17 ++ 8 files changed, 637 insertions(+) create mode 100644 modules/tts/tts-adapters/tts-adapter-aws/pom.xml create mode 100644 modules/tts/tts-adapters/tts-adapter-aws/src/main/java/org/daisy/pipeline/tts/aws/impl/AWSPollyTTSEngine.java create mode 100644 modules/tts/tts-adapters/tts-adapter-aws/src/main/java/org/daisy/pipeline/tts/aws/impl/AWSPollyTTSService.java create mode 100644 modules/tts/tts-adapters/tts-adapter-aws/src/main/resources/transform-ssml.xsl create mode 100644 modules/tts/tts-adapters/tts-adapter-aws/src/test/java/org/daisy/pipeline/tts/aws/impl/AWSPollyTTSServiceTest.java create mode 100644 modules/tts/tts-adapters/tts-adapter-aws/src/test/resources/logback.xml diff --git a/modules/bom/pom.xml b/modules/bom/pom.xml index 0e8e5ed495..12569fa3b8 100644 --- a/modules/bom/pom.xml +++ b/modules/bom/pom.xml @@ -23,6 +23,7 @@ 2.0.24 5.0.0 24.2.3 + 2.31.45 @@ -452,6 +453,11 @@ tts-adapter-acapela 3.1.7 + + org.daisy.pipeline.modules + tts-adapter-aws + 1.0.0-SNAPSHOT + org.daisy.pipeline.modules tts-adapter-azure @@ -712,6 +718,144 @@ client-sdk 1.37.0 + + + software.amazon.awssdk + polly + ${awssdk.version} + + + software.amazon.awssdk + sdk-core + ${awssdk.version} + + + software.amazon.awssdk + auth + ${awssdk.version} + + + software.amazon.awssdk + identity-spi + ${awssdk.version} + + + software.amazon.awssdk + http-auth + ${awssdk.version} + + + software.amazon.awssdk + http-auth-aws + ${awssdk.version} + + + software.amazon.awssdk + http-auth-spi + ${awssdk.version} + + + software.amazon.awssdk + http-client-spi + ${awssdk.version} + + + software.amazon.awssdk + apache-client + ${awssdk.version} + + + software.amazon.awssdk + aws-core + ${awssdk.version} + + + software.amazon.awssdk + utils + ${awssdk.version} + + + software.amazon.awssdk + regions + ${awssdk.version} + + + software.amazon.awssdk + aws-json-protocol + ${awssdk.version} + + + software.amazon.awssdk + retries + ${awssdk.version} + + + software.amazon.awssdk + retries-spi + ${awssdk.version} + + + software.amazon.awssdk + profiles + ${awssdk.version} + + + software.amazon.awssdk + endpoints-spi + ${awssdk.version} + + + software.amazon.awssdk + metrics-spi + ${awssdk.version} + + + software.amazon.awssdk + protocol-core + ${awssdk.version} + + + software.amazon.awssdk + aws-json-protocol + ${awssdk.version} + + + software.amazon.awssdk + json-utils + ${awssdk.version} + + + software.amazon.awssdk + third-party-jackson-core + ${awssdk.version} + + + software.amazon.awssdk + checksums + ${awssdk.version} + + + software.amazon.awssdk + checksums-spi + ${awssdk.version} + + + org.reactivestreams + reactive-streams + 1.0.4 + + + org.reactivestreams + reactive-streams-tck + 1.0.4 + + + org.reactivestreams + reactive-streams-tck-flow + 1.0.4 + diff --git a/modules/pom.xml b/modules/pom.xml index daf8bcee2e..5807736bcb 100644 --- a/modules/pom.xml +++ b/modules/pom.xml @@ -66,6 +66,7 @@ tts/tts-adapters/tts-adapter-google tts/tts-adapters/tts-adapter-cereproc tts/tts-adapters/tts-adapter-azure + tts/tts-adapters/tts-adapter-aws tts/tts-mocks tts/mathml-to-ssml braille/braille-common diff --git a/modules/tts/tts-adapters/tts-adapter-aws/pom.xml b/modules/tts/tts-adapters/tts-adapter-aws/pom.xml new file mode 100644 index 0000000000..7e3a6a0c38 --- /dev/null +++ b/modules/tts/tts-adapters/tts-adapter-aws/pom.xml @@ -0,0 +1,73 @@ + + + + 4.0.0 + + + org.daisy.pipeline.modules + modules-parent + 1.15.0 + ../../../parent + + + 1.0.0-SNAPSHOT + tts-adapter-aws + bundle + + DAISY Pipeline 2 module :: TTS Adapter for Amazon Polly text-to-speech service + Implementation of the TTS API for Amazon Polly text-to-speech service + + + + org.daisy.pipeline.modules + tts-common + + + org.daisy.libs + saxon-he + + + software.amazon.awssdk + polly + + + com.google.guava + guava + + + + + + org.daisy.pipeline.tts.aws.impl.AWSPollyTTSService + + + + + + + org.apache.felix + maven-bundle-plugin + + + + net.sf.saxon.*;version="${saxon.versionRange}", + !org.daisy.common.spi, + * + + + + + + org.apache.maven.plugins + maven-surefire-plugin + + + ToBeSetForTests + ToBeSetForTests + ToBeSetForTests + + + + + + diff --git a/modules/tts/tts-adapters/tts-adapter-aws/src/main/java/org/daisy/pipeline/tts/aws/impl/AWSPollyTTSEngine.java b/modules/tts/tts-adapters/tts-adapter-aws/src/main/java/org/daisy/pipeline/tts/aws/impl/AWSPollyTTSEngine.java new file mode 100644 index 0000000000..50da2b5c77 --- /dev/null +++ b/modules/tts/tts-adapters/tts-adapter-aws/src/main/java/org/daisy/pipeline/tts/aws/impl/AWSPollyTTSEngine.java @@ -0,0 +1,183 @@ +package org.daisy.pipeline.tts.aws.impl; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.net.URL; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.Locale; +import java.util.Map; +import static java.util.stream.Collectors.toList; + +import javax.sound.sampled.AudioFormat; + +import com.google.common.io.ByteStreams; + +import net.sf.saxon.s9api.SaxonApiException; +import net.sf.saxon.s9api.XdmNode; + +import org.daisy.common.file.URLs; +import org.daisy.pipeline.tts.TTSEngine; +import org.daisy.pipeline.tts.TTSRegistry; +import org.daisy.pipeline.tts.TTSService.SynthesisException; +import org.daisy.pipeline.tts.Voice; +import org.daisy.pipeline.tts.VoiceInfo.Gender; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import software.amazon.awssdk.auth.credentials.AwsCredentials; +import software.amazon.awssdk.core.exception.AbortedException; +import software.amazon.awssdk.regions.Region; +import software.amazon.awssdk.services.polly.PollyClient; +import software.amazon.awssdk.services.polly.model.DescribeVoicesResponse; +import software.amazon.awssdk.services.polly.model.SynthesizeSpeechRequest; +import software.amazon.awssdk.services.polly.model.Engine; +import software.amazon.awssdk.services.polly.model.TextType; +import software.amazon.awssdk.services.polly.model.VoiceId; +import software.amazon.awssdk.services.polly.model.OutputFormat; + +/** + * TTS adapter for Amazon Polly + * @author mmartida - original author + * @author Nicolas Pavie - updates and refactoring for DAISY Pipeline 2, version 1.15+ + */ +public class AWSPollyTTSEngine extends TTSEngine { + + private final static URL SSML_TRANSFORMER = URLs.getResourceFromJAR("/transform-ssml.xsl", AWSPollyTTSEngine.class); + + private final static Logger LOGGER = LoggerFactory.getLogger(AWSPollyTTSEngine.class); + + private static final AudioFormat AF_PCM_POLLY = new AudioFormat(AudioFormat.Encoding.PCM_SIGNED, 16000f, 16, 1, 2, 16000f, false); + + /** + * Client object provided by AWS SDK to interact with the polly engines + */ + private final PollyClient client; + + /** + * Map of AWS voices available to the user for each engine + */ + private final HashMap awsVoicesDescription = new HashMap<>(); + + private int mPriority; + + /** + * Constructor for the AWSPollyTTSEngine + * + * @param service the bound service + * @param accessKey the AWS account access key + * @param secretKey the AWS account secret key + * @param region the AWS region + * @throws SynthesisException + */ + public AWSPollyTTSEngine(AWSPollyTTSService service, String accessKey, String secretKey, String region, int priority) + throws SynthesisException { + + super(service); + this.client = PollyClient.builder() + .credentialsProvider(() -> new AwsCredentials() { + @Override + public String accessKeyId() { + return accessKey; + } + + @Override + public String secretAccessKey() { + return secretKey; + } + }) + .region(Region.of(region)) + .build(); + this.mPriority = priority; + } + + + /** + * Get the list of available voices + * + * @throws SynthesisException + * @throws InterruptedException + */ + @Override + public Collection getAvailableVoices() throws SynthesisException, InterruptedException { + Collection voices = new ArrayList<>(); + if(awsVoicesDescription.isEmpty()){ + // fetch all voices for all engines + for (Engine engine : Engine.knownValues()) { + try { + DescribeVoicesResponse awsvoices = this.client.describeVoices(b -> b.engine(engine)); + awsVoicesDescription.put(engine, awsvoices); + } catch (Exception e) { + LOGGER.error("Error fetching AWS voices for engine {}: {}", engine, e.getMessage()); + } + } + } + for (DescribeVoicesResponse awsvoices : awsVoicesDescription.values()) { + voices.addAll( + awsvoices.voices() + .stream() + .map(i -> new Voice("polly", + i.name(), + new Locale(i.languageName()), + Gender.of(i.genderAsString().toLowerCase()))) + .collect(toList()) + ); + } + return voices; + } + + @Override + public SynthesisResult synthesize(XdmNode pXdmNode, Voice pVoice, TTSRegistry.TTSResource pTTSResource) + throws SynthesisException, InterruptedException { + + String sentence; { + try { + Map params = new HashMap<>(); {} + sentence = transformSsmlNodeToString(pXdmNode, SSML_TRANSFORMER, params); + } catch (IOException | SaxonApiException e) { + throw new SynthesisException(e); + } + } + try { + SynthesizeSpeechRequest synthReq = SynthesizeSpeechRequest.builder() + .engine(Engine.STANDARD) + .text(sentence) + .textType(TextType.SSML) + .voiceId( + VoiceId.fromValue(pVoice.getName()) + ) + .outputFormat(OutputFormat.PCM) + .build(); + return new SynthesisResult( + createAudioStream( + AF_PCM_POLLY, new ByteArrayInputStream( + ByteStreams.toByteArray(this.client.synthesizeSpeech(synthReq)) + ) + ) + ); + } catch (IOException ex) { + throw new SynthesisException("Could not synthesize sentence \"" + sentence + "\"", ex); + } catch (AbortedException e) { + if (Thread.currentThread().isInterrupted()) { + InterruptedException ex = new InterruptedException(); + ex.initCause(e); + throw ex; + } else { + throw e; + } + } + } + + @Override + public int getOverallPriority() { + return mPriority; + } + + @Override + public int expectedMillisecPerWord() { + // Worst case scenario with quotas: + // the thread can wait for a bit more than a minute for a anwser + return 10000; + } +} diff --git a/modules/tts/tts-adapters/tts-adapter-aws/src/main/java/org/daisy/pipeline/tts/aws/impl/AWSPollyTTSService.java b/modules/tts/tts-adapters/tts-adapter-aws/src/main/java/org/daisy/pipeline/tts/aws/impl/AWSPollyTTSService.java new file mode 100644 index 0000000000..121da096a9 --- /dev/null +++ b/modules/tts/tts-adapters/tts-adapter-aws/src/main/java/org/daisy/pipeline/tts/aws/impl/AWSPollyTTSService.java @@ -0,0 +1,91 @@ +package org.daisy.pipeline.tts.aws.impl; + +import java.util.Map; +import java.util.Optional; + +import org.daisy.common.properties.Properties; +import org.daisy.common.properties.Properties.Property; +import org.daisy.pipeline.tts.TTSService; + +import org.osgi.service.component.annotations.Component; + +/** + * @author mmartida - original author + * @author Nicolas Pavie - update for pipeline-modules 1.15+ + */ +@Component( + name = "aws-tts-service", + service = { TTSService.class } +) +public class AWSPollyTTSService implements TTSService { + + private static final Property AWS_ACCESS_KEY = Properties.getProperty( + "org.daisy.pipeline.tts.aws.accesskey", + true, + "Access key for Amazon Polly speech engine", + true, + null + ); + + private static final Property AWS_SECRET_KEY = Properties.getProperty( + "org.daisy.pipeline.tts.aws.secretkey", + true, + "Secret key for Amazon Polly speech engine", + true, + null); + + private static final Property AWS_REGION = Properties.getProperty( + "org.daisy.pipeline.tts.aws.region", + true, + "Region for Amazon Polly speech engine", + true, + null); + + private static final Property AWS_PRIORITY = Properties.getProperty( + "org.daisy.pipeline.tts.aws.priority", + true, + "Priority of Amazon voices relative to voices of other engines", + false, + "2"); + + @Override + public AWSPollyTTSEngine newEngine(Map properties) + throws ServiceDisabledException, SynthesisException { + + String accessKey = AWS_ACCESS_KEY.getValue(properties); + if (accessKey == null || "".equals(accessKey)) + throw new ServiceDisabledException("Property not set: " + AWS_ACCESS_KEY.getName()); + String secretKey = AWS_SECRET_KEY.getValue(properties); + if (secretKey == null || "".equals(secretKey)) + throw new ServiceDisabledException("Property not set: " + AWS_SECRET_KEY.getName()); + String region = AWS_REGION.getValue(properties); + if (region == null || "".equals(region)) + throw new ServiceDisabledException("Property not set: " + AWS_REGION.getName()); + int priority = getPropertyAsInt(properties, AWS_PRIORITY).get(); + return new AWSPollyTTSEngine(this, accessKey, secretKey, region, priority); + } + + private static Optional getPropertyAsInt(Map properties, Property prop) + throws SynthesisException { + + String str = prop.getValue(properties); + if (str != null) { + try { + return Optional.of(Integer.valueOf(str)); + } catch (NumberFormatException e) { + throw new SynthesisException(str + " is not a valid a value for property " + prop.getName()); + } + } + return Optional.empty(); + } + + @Override + public String getName() { + return "polly"; + } + + @Override + public String getDisplayName() { + return "Amazon"; + } +} diff --git a/modules/tts/tts-adapters/tts-adapter-aws/src/main/resources/transform-ssml.xsl b/modules/tts/tts-adapters/tts-adapter-aws/src/main/resources/transform-ssml.xsl new file mode 100644 index 0000000000..13037b05e8 --- /dev/null +++ b/modules/tts/tts-adapters/tts-adapter-aws/src/main/resources/transform-ssml.xsl @@ -0,0 +1,36 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/modules/tts/tts-adapters/tts-adapter-aws/src/test/java/org/daisy/pipeline/tts/aws/impl/AWSPollyTTSServiceTest.java b/modules/tts/tts-adapters/tts-adapter-aws/src/test/java/org/daisy/pipeline/tts/aws/impl/AWSPollyTTSServiceTest.java new file mode 100644 index 0000000000..ca3bcbc821 --- /dev/null +++ b/modules/tts/tts-adapters/tts-adapter-aws/src/test/java/org/daisy/pipeline/tts/aws/impl/AWSPollyTTSServiceTest.java @@ -0,0 +1,92 @@ +package org.daisy.pipeline.tts.aws.impl; + +import java.io.StringReader; +import java.util.Collection; +import java.util.HashMap; +import java.util.Map; + +import javax.sound.sampled.AudioFormat; +import javax.sound.sampled.AudioInputStream; +import javax.xml.transform.sax.SAXSource; + +import net.sf.saxon.s9api.Processor; +import net.sf.saxon.s9api.SaxonApiException; +import net.sf.saxon.s9api.XdmNode; + +import org.daisy.pipeline.tts.TTSEngine; +import org.daisy.pipeline.tts.TTSRegistry.TTSResource; +import org.daisy.pipeline.tts.Voice; + +import org.xml.sax.InputSource; + +import org.junit.Assert; +import org.junit.Assume; +import org.junit.Before; +import org.junit.Test; + +public class AWSPollyTTSServiceTest { + + @Before + public void checkConfig() { + String prop = "org.daisy.pipeline.tts.aws.accesskey"; + if (System.getProperty(prop) == null) { + System.out.println("No access key provided, please set the " + prop + " property"); + } + Assume.assumeTrue(System.getProperty(prop) != null); + prop = "org.daisy.pipeline.tts.aws.secretkey"; + if (System.getProperty(prop) == null) { + System.out.println("No secret key provided, please set the " + prop + " property"); + } + Assume.assumeTrue(System.getProperty(prop) != null); + prop = "org.daisy.pipeline.tts.aws.region"; + if (System.getProperty(prop) == null) { + System.out.println("No region provided, please set the " + prop + " property"); + } + Assume.assumeTrue(System.getProperty(prop) != null); + } + + private static AWSPollyTTSEngine allocateEngine() throws Throwable { + Map params = new HashMap<>(); { + params.put("org.daisy.pipeline.tts.aws.accesskey", System.getProperty("org.daisy.pipeline.tts.aws.accesskey")); + params.put("org.daisy.pipeline.tts.aws.secretkey", System.getProperty("org.daisy.pipeline.tts.aws.secretkey")); + params.put("org.daisy.pipeline.tts.aws.region", System.getProperty("org.daisy.pipeline.tts.aws.region")); + } + return new AWSPollyTTSService().newEngine(params); + } + + private static int getSize(AudioInputStream audio) { + return Math.toIntExact(audio.getFrameLength() * audio.getFormat().getFrameSize()); + } + + private static final Processor proc = new Processor(false); + private static XdmNode parseSSML(String ssml) throws SaxonApiException { + return proc.newDocumentBuilder().build(new SAXSource(new InputSource(new StringReader(ssml)))); + } + + @Test + public void testAvailableVoices() throws Throwable { + TTSEngine engine = allocateEngine(); + Collection voices = engine.getAvailableVoices(); + Assert.assertTrue(voices.size() > 0); + } + + @Test + public void testSpeak() throws Throwable { + TTSEngine engine = allocateEngine(); + TTSResource resource = engine.allocateThreadResources(); + try { + TTSEngine.SynthesisResult res = engine.synthesize( + // con este texto falla. Es lo que le llega desde la aplicación para probar el engine y está fallando. +// "small sentence", + // con esto funciona. Parece que no le gustan los namespaces. + parseSSML("García, que de humor iba justito cuando era el inocente, entró una vez a un trapo que no ayudó a la fluidez de su relación tempestuosa con Gil."), + new Voice("polly", "Lucia"), resource + ); + + AudioFormat format = res.audio.getFormat(); + Assert.assertTrue(getSize(res.audio) > 10000); + } finally { + engine.releaseThreadResources(resource); + } + } +} diff --git a/modules/tts/tts-adapters/tts-adapter-aws/src/test/resources/logback.xml b/modules/tts/tts-adapters/tts-adapter-aws/src/test/resources/logback.xml new file mode 100644 index 0000000000..825a2e6f58 --- /dev/null +++ b/modules/tts/tts-adapters/tts-adapter-aws/src/test/resources/logback.xml @@ -0,0 +1,17 @@ + + + + + + %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n + + + + + + + + + + \ No newline at end of file From 50b58602c08ea3c8e5fce8c49a27509ec4ccc214 Mon Sep 17 00:00:00 2001 From: Nicolas Pavie Date: Tue, 20 May 2025 11:00:53 +0200 Subject: [PATCH 2/5] [modules] Improve voices view and fix SSML - Change name of engine from "polly" to "aws" for mapping in UI project - Prioritize standard voices in the voices collection. This is to ensure a standard voice is used for the sanity check of the engine. - Add the engine (standard, neural, generative, long form) in the voice name to help identify which engine would be use (as it can greatly impact costs for the user). - Remove IDs in SSML as AWS rejects SSML with IDs. --- .../tts/aws/impl/AWSPollyTTSEngine.java | 78 ++++++++++++------- .../tts/aws/impl/AWSPollyTTSService.java | 2 +- .../src/main/resources/transform-ssml.xsl | 4 + .../tts/aws/impl/AWSPollyTTSServiceTest.java | 7 +- 4 files changed, 59 insertions(+), 32 deletions(-) diff --git a/modules/tts/tts-adapters/tts-adapter-aws/src/main/java/org/daisy/pipeline/tts/aws/impl/AWSPollyTTSEngine.java b/modules/tts/tts-adapters/tts-adapter-aws/src/main/java/org/daisy/pipeline/tts/aws/impl/AWSPollyTTSEngine.java index 50da2b5c77..68ce56e849 100644 --- a/modules/tts/tts-adapters/tts-adapter-aws/src/main/java/org/daisy/pipeline/tts/aws/impl/AWSPollyTTSEngine.java +++ b/modules/tts/tts-adapters/tts-adapter-aws/src/main/java/org/daisy/pipeline/tts/aws/impl/AWSPollyTTSEngine.java @@ -8,6 +8,8 @@ import java.util.HashMap; import java.util.Locale; import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import static java.util.stream.Collectors.toList; import javax.sound.sampled.AudioFormat; @@ -92,7 +94,6 @@ public String secretAccessKey() { this.mPriority = priority; } - /** * Get the list of available voices * @@ -101,32 +102,47 @@ public String secretAccessKey() { */ @Override public Collection getAvailableVoices() throws SynthesisException, InterruptedException { - Collection voices = new ArrayList<>(); - if(awsVoicesDescription.isEmpty()){ - // fetch all voices for all engines - for (Engine engine : Engine.knownValues()) { - try { - DescribeVoicesResponse awsvoices = this.client.describeVoices(b -> b.engine(engine)); - awsVoicesDescription.put(engine, awsvoices); - } catch (Exception e) { - LOGGER.error("Error fetching AWS voices for engine {}: {}", engine, e.getMessage()); + synchronized (this.client){ + // Order of priorities based on + Engine[] engines = new Engine[] { + Engine.STANDARD, + Engine.NEURAL, + Engine.GENERATIVE, + Engine.LONG_FORM, + }; + Collection voices = new ArrayList<>(); + if (awsVoicesDescription.isEmpty()) { + // fetch all voices for all engines + for (Engine engine : engines) { + try { + DescribeVoicesResponse awsvoices = this.client.describeVoices(b -> b.engine(engine)); + awsVoicesDescription.put(engine, awsvoices); + } catch (Exception e) { + LOGGER.error("Error fetching AWS voices for engine {}: {}", engine, e.getMessage()); + } } } + for (Engine engine : engines) { + if (awsVoicesDescription.containsKey(engine)) { + voices.addAll( + awsVoicesDescription.get(engine).voices() + .stream() + .map(i -> new Voice("aws", + i.idAsString() + " (" + engine.toString() + ")", + new Locale(i.languageName()), + Gender.of(i.genderAsString().toLowerCase()))) + .collect(toList()) + ); + } + } + return voices; } - for (DescribeVoicesResponse awsvoices : awsVoicesDescription.values()) { - voices.addAll( - awsvoices.voices() - .stream() - .map(i -> new Voice("polly", - i.name(), - new Locale(i.languageName()), - Gender.of(i.genderAsString().toLowerCase()))) - .collect(toList()) - ); - } - return voices; } + private Pattern voiceIdAndEnginePattern = Pattern.compile( + "([a-z0-9_]+)\\s*\\((standard|neural|generative|long-form)\\)", + Pattern.CASE_INSENSITIVE); + @Override public SynthesisResult synthesize(XdmNode pXdmNode, Voice pVoice, TTSRegistry.TTSResource pTTSResource) throws SynthesisException, InterruptedException { @@ -140,13 +156,23 @@ public SynthesisResult synthesize(XdmNode pXdmNode, Voice pVoice, TTSRegistry.TT } } try { + // Search back engine and voice from voiceId + Matcher m = voiceIdAndEnginePattern.matcher(pVoice.getName()); + if (!m.matches()) throw new SynthesisException( + "AWS VoiceId and engine not found from voice name: " + pVoice.getName()); + VoiceId selectedVoice = VoiceId.fromValue(m.group(1)); + Engine selectedEngine = Engine.fromValue(m.group(2)); + if (selectedVoice == null) { + throw new SynthesisException("Unknown Voice provided in : " + pVoice.getName()); + } + if (selectedEngine == null) { + throw new SynthesisException("Unknown Engine provided in " + pVoice.getName()); + } SynthesizeSpeechRequest synthReq = SynthesizeSpeechRequest.builder() - .engine(Engine.STANDARD) + .engine(selectedEngine) .text(sentence) .textType(TextType.SSML) - .voiceId( - VoiceId.fromValue(pVoice.getName()) - ) + .voiceId(selectedVoice) .outputFormat(OutputFormat.PCM) .build(); return new SynthesisResult( diff --git a/modules/tts/tts-adapters/tts-adapter-aws/src/main/java/org/daisy/pipeline/tts/aws/impl/AWSPollyTTSService.java b/modules/tts/tts-adapters/tts-adapter-aws/src/main/java/org/daisy/pipeline/tts/aws/impl/AWSPollyTTSService.java index 121da096a9..bf656b863e 100644 --- a/modules/tts/tts-adapters/tts-adapter-aws/src/main/java/org/daisy/pipeline/tts/aws/impl/AWSPollyTTSService.java +++ b/modules/tts/tts-adapters/tts-adapter-aws/src/main/java/org/daisy/pipeline/tts/aws/impl/AWSPollyTTSService.java @@ -81,7 +81,7 @@ private static Optional getPropertyAsInt(Map properties, @Override public String getName() { - return "polly"; + return "aws"; } @Override diff --git a/modules/tts/tts-adapters/tts-adapter-aws/src/main/resources/transform-ssml.xsl b/modules/tts/tts-adapters/tts-adapter-aws/src/main/resources/transform-ssml.xsl index 13037b05e8..edfb84c824 100644 --- a/modules/tts/tts-adapters/tts-adapter-aws/src/main/resources/transform-ssml.xsl +++ b/modules/tts/tts-adapters/tts-adapter-aws/src/main/resources/transform-ssml.xsl @@ -8,6 +8,10 @@ + + + + diff --git a/modules/tts/tts-adapters/tts-adapter-aws/src/test/java/org/daisy/pipeline/tts/aws/impl/AWSPollyTTSServiceTest.java b/modules/tts/tts-adapters/tts-adapter-aws/src/test/java/org/daisy/pipeline/tts/aws/impl/AWSPollyTTSServiceTest.java index ca3bcbc821..a5d753d436 100644 --- a/modules/tts/tts-adapters/tts-adapter-aws/src/test/java/org/daisy/pipeline/tts/aws/impl/AWSPollyTTSServiceTest.java +++ b/modules/tts/tts-adapters/tts-adapter-aws/src/test/java/org/daisy/pipeline/tts/aws/impl/AWSPollyTTSServiceTest.java @@ -76,11 +76,8 @@ public void testSpeak() throws Throwable { TTSResource resource = engine.allocateThreadResources(); try { TTSEngine.SynthesisResult res = engine.synthesize( - // con este texto falla. Es lo que le llega desde la aplicación para probar el engine y está fallando. -// "small sentence", - // con esto funciona. Parece que no le gustan los namespaces. - parseSSML("García, que de humor iba justito cuando era el inocente, entró una vez a un trapo que no ayudó a la fluidez de su relación tempestuosa con Gil."), - new Voice("polly", "Lucia"), resource + parseSSML("García, que de humor iba justito cuando era el inocente, entró una vez a un trapo que no ayudó a la fluidez de su relación tempestuosa con Gil."), + new Voice("aws", "Lucia (standard)"), resource ); AudioFormat format = res.audio.getFormat(); From 134b8f78529d03d650ad639eb63b7658e7b6193e Mon Sep 17 00:00:00 2001 From: Nicolas Pavie Date: Tue, 20 May 2025 11:41:06 +0200 Subject: [PATCH 3/5] [modules] Change default priority to 15 to align with Azure and Google adapters. --- .../org/daisy/pipeline/tts/aws/impl/AWSPollyTTSService.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/tts/tts-adapters/tts-adapter-aws/src/main/java/org/daisy/pipeline/tts/aws/impl/AWSPollyTTSService.java b/modules/tts/tts-adapters/tts-adapter-aws/src/main/java/org/daisy/pipeline/tts/aws/impl/AWSPollyTTSService.java index bf656b863e..f509f5697e 100644 --- a/modules/tts/tts-adapters/tts-adapter-aws/src/main/java/org/daisy/pipeline/tts/aws/impl/AWSPollyTTSService.java +++ b/modules/tts/tts-adapters/tts-adapter-aws/src/main/java/org/daisy/pipeline/tts/aws/impl/AWSPollyTTSService.java @@ -46,7 +46,7 @@ public class AWSPollyTTSService implements TTSService { true, "Priority of Amazon voices relative to voices of other engines", false, - "2"); + "15"); @Override public AWSPollyTTSEngine newEngine(Map properties) From e17da9ff9b7353b8c4f3220390ae57957e57a510 Mon Sep 17 00:00:00 2001 From: Nicolas Pavie Date: Tue, 20 May 2025 11:41:34 +0200 Subject: [PATCH 4/5] [modules] Ignore tests requiring AWS authentication by default --- modules/tts/tts-adapters/tts-adapter-aws/src/test/java/ignore | 1 + 1 file changed, 1 insertion(+) create mode 100644 modules/tts/tts-adapters/tts-adapter-aws/src/test/java/ignore diff --git a/modules/tts/tts-adapters/tts-adapter-aws/src/test/java/ignore b/modules/tts/tts-adapters/tts-adapter-aws/src/test/java/ignore new file mode 100644 index 0000000000..54dee5fd7b --- /dev/null +++ b/modules/tts/tts-adapters/tts-adapter-aws/src/test/java/ignore @@ -0,0 +1 @@ +org/daisy/pipeline/tts/aws/impl/AWSPollyTTSServiceTest.java From 980dc51295fe25005b4e9ae50107a59b3ff1044d Mon Sep 17 00:00:00 2001 From: Nicolas Pavie Date: Tue, 20 May 2025 12:05:13 +0200 Subject: [PATCH 5/5] [modules] Add README --- .../tts-adapters/tts-adapter-aws/README.md | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 modules/tts/tts-adapters/tts-adapter-aws/README.md diff --git a/modules/tts/tts-adapters/tts-adapter-aws/README.md b/modules/tts/tts-adapters/tts-adapter-aws/README.md new file mode 100644 index 0000000000..1a2c4a8692 --- /dev/null +++ b/modules/tts/tts-adapters/tts-adapter-aws/README.md @@ -0,0 +1,28 @@ +# AWS Polly Text-To-Speech Adapter + +This adapter provides a way to use the Amazon Web Service - Polly Text-to-Speech service in the pipeline. It is based on the [AWS SDK for Java](https://aws.amazon.com/sdk-for-java/) and the [AWS Polly API](https://docs.aws.amazon.com/polly/latest/dg/what-is.html). + +## New properties + +The following properties are used by this adapter: + +- `org.daisy.pipeline.tts.aws.accesskey` (mandatory) : Access key to connect to Amazon Web Service. +- `org.daisy.pipeline.tts.aws.secretkey` (mandatory) : Secret key to connect to Amazon Web Service. +- `org.daisy.pipeline.tts.aws.region` (mandatory) : Region associated with your Amazon Web Service account. +- `org.daisy.pipeline.tts.aws.priority` (defaults to `15`) : priority of usage within the pipeline if other text engine are available + +## Usage + +This adapter requires authentication keys (namely an access key and a secret key) to access the AWS Polly service. + +To get those keys, you will need to create an AWS account and a "IAM user" account, or a root user registered in an instance of an IAM identity center. To create such user account, please refer to the [AWS IAM user creation documentation](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_users_create.html). +Note that while it is not recommended, you can also use your root account to generate the required keys, as explained in [this AWS documention page](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_root-user_manage_add-key.html). + +From the IAM User account or your root, you should be able to create access and secret keys by following the instructions in the [AWS IAM user documentation](https://docs.aws.amazon.com/IAM/latest/UserGuide/access-key-self-managed.html). + +You should be able to retrieve the region identifier from your associated IAM identity center instance. +As an example for europe/paris, it would be displayed as `eu-west-3`. + +## Tests + +For unit test to work, please add the required properties mentionned above (`org.daisy.pipeline.tts.aws.{accesskey,secretkey,region}`) to either your configuration or pom, or add the options `-Dorg.daisy.pipeline.tts.aws.accesskey="your_access_key" -Dorg.daisy.pipeline.tts.aws.secretkey="your_secret_key" -Dorg.daisy.pipeline.tts.aws.region="your_region"` to your maven call.