44import typing
55from enum import Enum
66
7+ import httpx
8+
79try :
810 import websockets
911except ImportError :
@@ -177,8 +179,11 @@ async def _connect_audio(self, options: RealtimeAudioOptions) -> RealtimeConnect
177179 if not audio_format or not sample_rate :
178180 raise ValueError ("audio_format and sample_rate are required for manual audio mode" )
179181
182+ token = await self .get_token ()
183+
180184 # Build WebSocket URL with query parameters
181185 ws_url = self ._build_websocket_url (
186+ token = token ,
182187 model_id = model_id ,
183188 encoding = audio_format .value ,
184189 sample_rate = sample_rate ,
@@ -191,10 +196,7 @@ async def _connect_audio(self, options: RealtimeAudioOptions) -> RealtimeConnect
191196 )
192197
193198 # Connect to WebSocket
194- websocket = await websockets .connect (
195- ws_url ,
196- additional_headers = {"xi-api-key" : self .api_key }
197- )
199+ websocket = await websockets .connect (ws_url )
198200
199201 # Create connection object
200202 connection = RealtimeConnection (
@@ -223,12 +225,15 @@ async def _connect_url(self, options: RealtimeUrlOptions) -> RealtimeConnection:
223225 if not url :
224226 raise ValueError ("url is required for URL mode" )
225227
228+ token = await self .get_token ()
229+
226230 # Default to 16kHz for URL streaming
227231 sample_rate = 16000
228232 encoding = "pcm_16000"
229233
230234 # Build WebSocket URL
231235 ws_url = self ._build_websocket_url (
236+ token = token ,
232237 model_id = model_id ,
233238 encoding = encoding ,
234239 sample_rate = sample_rate ,
@@ -332,6 +337,7 @@ async def _stream_ffmpeg_to_websocket(self, connection: RealtimeConnection) -> N
332337
333338 def _build_websocket_url (
334339 self ,
340+ token : str ,
335341 model_id : str ,
336342 encoding : str ,
337343 sample_rate : int ,
@@ -348,6 +354,7 @@ def _build_websocket_url(
348354
349355 # Build query parameters
350356 params = [
357+ f"token={ token } " ,
351358 f"model_id={ model_id } " ,
352359 f"encoding={ encoding } " ,
353360 f"sample_rate={ sample_rate } " ,
@@ -369,3 +376,29 @@ def _build_websocket_url(
369376 query_string = "&" .join (params )
370377 return f"{ base } /v1/speech-to-text/realtime-beta?{ query_string } "
371378
379+ async def get_token (self ) -> str :
380+ """
381+ Fetch a single-use token for realtime scribe WebSocket connection.
382+
383+ Returns:
384+ str: The single-use token
385+
386+ Raises:
387+ RuntimeError: If the token request fails
388+ """
389+ try :
390+ # Convert WebSocket URL to HTTP URL for the token request
391+ http_base_url = self .base_url .replace ("wss://" , "https://" ).replace ("ws://" , "http://" )
392+
393+ async with httpx .AsyncClient () as client :
394+ response = await client .post (
395+ f"{ http_base_url } /v1/single-use-token/realtime_scribe" ,
396+ headers = {"xi-api-key" : self .api_key },
397+ )
398+
399+ if not response .is_success :
400+ raise RuntimeError (f"Failed to get token: { response .status_code } { response .reason_phrase } " )
401+ data = response .json ()
402+ return data ["token" ]
403+ except Exception as e :
404+ raise RuntimeError (f"Failed to get token: { e } " ) from e
0 commit comments