Skip to content

Commit a589a30

Browse files
committed
feat: introduce Inworld WebSocket TTS service supporting multiple audio contexts, along with an example.
1 parent ab58f72 commit a589a30

File tree

2 files changed

+467
-2
lines changed

2 files changed

+467
-2
lines changed
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
#
2+
# Copyright (c) 2024–2025, Daily
3+
#
4+
# SPDX-License-Identifier: BSD 2-Clause License
5+
#
6+
7+
import os
8+
9+
from dotenv import load_dotenv
10+
from loguru import logger
11+
12+
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
13+
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
14+
from pipecat.audio.vad.silero import SileroVADAnalyzer
15+
from pipecat.audio.vad.vad_analyzer import VADParams
16+
from pipecat.frames.frames import LLMRunFrame
17+
from pipecat.pipeline.pipeline import Pipeline
18+
from pipecat.pipeline.runner import PipelineRunner
19+
from pipecat.pipeline.task import PipelineParams, PipelineTask
20+
from pipecat.processors.aggregators.llm_context import LLMContext
21+
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
22+
from pipecat.runner.types import RunnerArguments
23+
from pipecat.runner.utils import create_transport
24+
from pipecat.services.deepgram.stt import DeepgramSTTService
25+
from pipecat.services.inworld.tts import InworldWebsocketTTSService
26+
from pipecat.services.openai.llm import OpenAILLMService
27+
from pipecat.transports.base_transport import BaseTransport, TransportParams
28+
from pipecat.transports.daily.transport import DailyParams
29+
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
30+
31+
load_dotenv(override=True)
32+
33+
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
34+
# instantiated. The function will be called when the desired transport gets
35+
# selected.
36+
transport_params = {
37+
"daily": lambda: DailyParams(
38+
audio_in_enabled=True,
39+
audio_out_enabled=True,
40+
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
41+
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
42+
),
43+
"twilio": lambda: FastAPIWebsocketParams(
44+
audio_in_enabled=True,
45+
audio_out_enabled=True,
46+
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
47+
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
48+
),
49+
"webrtc": lambda: TransportParams(
50+
audio_in_enabled=True,
51+
audio_out_enabled=True,
52+
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
53+
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
54+
),
55+
}
56+
57+
58+
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
59+
logger.info(f"Starting bot")
60+
61+
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
62+
63+
# Inworld WebSocket TTS Service with multi-context streaming support
64+
tts = InworldWebsocketTTSService(
65+
api_key=os.getenv("INWORLD_API_KEY", ""),
66+
voice_id="Ashley",
67+
model="inworld-tts-1",
68+
# Params are optional: you can pass speaking_rate, temperature, etc.
69+
)
70+
71+
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
72+
73+
messages = [
74+
{
75+
"role": "system",
76+
"content": "You are very knowledgable about dogs. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.",
77+
},
78+
]
79+
80+
context = LLMContext(messages)
81+
context_aggregator = LLMContextAggregatorPair(context)
82+
83+
pipeline = Pipeline(
84+
[
85+
transport.input(), # Transport user input
86+
stt, # STT
87+
context_aggregator.user(), # User responses
88+
llm, # LLM
89+
tts, # TTS
90+
transport.output(), # Transport bot output
91+
context_aggregator.assistant(), # Assistant spoken responses
92+
]
93+
)
94+
95+
task = PipelineTask(
96+
pipeline,
97+
params=PipelineParams(
98+
enable_metrics=True,
99+
enable_usage_metrics=True,
100+
),
101+
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
102+
)
103+
104+
@transport.event_handler("on_client_connected")
105+
async def on_client_connected(transport, client):
106+
logger.info(f"Client connected")
107+
# Kick off the conversation.
108+
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
109+
await task.queue_frames([LLMRunFrame()])
110+
111+
@transport.event_handler("on_client_disconnected")
112+
async def on_client_disconnected(transport, client):
113+
logger.info(f"Client disconnected")
114+
await task.cancel()
115+
116+
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
117+
118+
await runner.run(task)
119+
120+
121+
async def bot(runner_args: RunnerArguments):
122+
"""Main bot entry point compatible with Pipecat Cloud."""
123+
transport = await create_transport(runner_args, transport_params)
124+
await run_bot(transport, runner_args)
125+
126+
127+
if __name__ == "__main__":
128+
from pipecat.runner.run import main
129+
130+
main()

0 commit comments

Comments
 (0)