diff --git a/common/changes/@coze/api/myang-voice-ext-interface_2025-11-26-05-40.json b/common/changes/@coze/api/myang-voice-ext-interface_2025-11-26-05-40.json new file mode 100644 index 00000000..68381b0f --- /dev/null +++ b/common/changes/@coze/api/myang-voice-ext-interface_2025-11-26-05-40.json @@ -0,0 +1,11 @@ +{ + "changes": [ + { + "packageName": "@coze/api", + "comment": "音播报提供释放播放器功能并支持语音识别自定义配置", + "type": "minor" + } + ], + "packageName": "@coze/api", + "email": "447258925@qq.com" +} diff --git a/common/changes/@coze/api/myang-voice-ext-interface_2025-11-27-01-27.json b/common/changes/@coze/api/myang-voice-ext-interface_2025-11-27-01-27.json new file mode 100644 index 00000000..cb34c691 --- /dev/null +++ b/common/changes/@coze/api/myang-voice-ext-interface_2025-11-27-01-27.json @@ -0,0 +1,11 @@ +{ + "changes": [ + { + "packageName": "@coze/api", + "comment": "Clear timeout and close WebSocket before destroying player", + "type": "patch" + } + ], + "packageName": "@coze/api", + "email": "447258925@qq.com" +} diff --git a/packages/coze-js/src/resources/websockets/types.ts b/packages/coze-js/src/resources/websockets/types.ts index a149bb24..03b4603a 100644 --- a/packages/coze-js/src/resources/websockets/types.ts +++ b/packages/coze-js/src/resources/websockets/types.ts @@ -193,7 +193,7 @@ export interface CommonErrorEvent extends BaseEventWithDetail { }; } -interface AudioConfig { +export interface AudioConfig { /** Input audio format, supports pcm/wav/ogg */ format?: 'pcm' | 'wav' | 'ogg'; /** Input audio codec, supports pcm/opus/g711a/g711u */ diff --git a/packages/coze-js/src/ws-tools/speech/index.ts b/packages/coze-js/src/ws-tools/speech/index.ts index 8b6c83dc..a2ec17ed 100644 --- a/packages/coze-js/src/ws-tools/speech/index.ts +++ b/packages/coze-js/src/ws-tools/speech/index.ts @@ -186,6 +186,30 @@ class WsSpeechClient { this.closeWs(); } + /** + * Releases wavStreamPlayer resources so callers can dispose the instance. + * In a mobile browser environment, if the WsSpeechClient is instantiated multiple times, + * you can additionally call the destroyPlayer method to release resources and prevent issues with speech playback. + */ + async destroyPlayer() { + // Clear any pending timeout first + if (this.playbackTimeout) { + clearTimeout(this.playbackTimeout); + this.playbackTimeout = null; + } + + // Ensure WebSocket is closed + this.closeWs(); + + // Now safe to destroy player and reset state + await this.wavStreamPlayer.destroy(); + this.totalDuration = 0; + this.playbackStartTime = null; + this.playbackPauseTime = null; + this.elapsedBeforePause = 0; + this.audioDeltaList.length = 0; + } + append(message: string) { this.ws?.send({ id: uuid(), diff --git a/packages/coze-js/src/ws-tools/transcription/index.ts b/packages/coze-js/src/ws-tools/transcription/index.ts index 70d71eb5..814e2ccd 100644 --- a/packages/coze-js/src/ws-tools/transcription/index.ts +++ b/packages/coze-js/src/ws-tools/transcription/index.ts @@ -1,11 +1,15 @@ import { v4 as uuid } from 'uuid'; -import { WebsocketsEventType } from '../..'; -import BaseWsTranscriptionClient from './base'; import { type AIDenoiserProcessorLevel, type AIDenoiserProcessorMode, } from '../recorder/pcm-recorder'; +import { + type AudioConfig, + type TranscriptionsUpdateEvent, +} from '../../resources/websockets/types'; +import { WebsocketsEventType } from '../..'; +import BaseWsTranscriptionClient from './base'; class WsTranscriptionClient extends BaseWsTranscriptionClient { private isRecording = false; @@ -17,18 +21,45 @@ class WsTranscriptionClient extends BaseWsTranscriptionClient { this.ws?.send({ id: uuid(), event_type: WebsocketsEventType.TRANSCRIPTIONS_UPDATE, - data: { - input_audio: { - format: 'pcm', - codec: 'pcm', - sample_rate: sampleRate, - channel: 1, - bit_depth: 16, - }, - }, + data: this.getInitialUpdateData(sampleRate), }); } + private getInitialUpdateData( + sampleRate: number, + ): TranscriptionsUpdateEvent['data'] { + const defaultInputAudio: AudioConfig = { + format: 'pcm', + codec: 'pcm', + sample_rate: sampleRate, + channel: 1, + bit_depth: 16, + }; + + const customUpdateData = this.config.transcriptionUpdateData; + + if (!customUpdateData) { + return { + input_audio: defaultInputAudio, + }; + } + + if (!customUpdateData.input_audio) { + return { + ...customUpdateData, + input_audio: defaultInputAudio, + }; + } + + return { + ...customUpdateData, + input_audio: { + ...defaultInputAudio, + ...customUpdateData.input_audio, + }, + }; + } + destroy() { this.recorder.destroy(); this.listeners.clear(); diff --git a/packages/coze-js/src/ws-tools/types.ts b/packages/coze-js/src/ws-tools/types.ts index b137cae3..3333b14b 100644 --- a/packages/coze-js/src/ws-tools/types.ts +++ b/packages/coze-js/src/ws-tools/types.ts @@ -1,3 +1,4 @@ +import { type TranscriptionsUpdateEvent } from '../resources/websockets/types'; import { type AIDenoiserProcessorLevel, type AIDenoiserProcessorMode, @@ -428,6 +429,11 @@ export interface WsTranscriptionClientOptions extends WsToolsOptions { wavRecordConfig?: WavRecordConfig; entityType?: 'bot' | 'workflow'; entityId?: string; + /** + * en: Custom data payload for initial transcription update + * zh: 初始语音识别更新的接口配置数据 + */ + transcriptionUpdateData?: TranscriptionsUpdateEvent['data']; } export type WsSimultInterpretationClientOptions = WsTranscriptionClientOptions;