coze-dev · chenyuliang-star · Nov 27, 2025 · Nov 3, 2025 · Nov 27, 2025
diff --git a/common/changes/@coze/api/myang-voice-ext-interface_2025-11-26-05-40.json b/common/changes/@coze/api/myang-voice-ext-interface_2025-11-26-05-40.json
@@ -0,0 +1,11 @@
+{
+  "changes": [
+    {
+      "packageName": "@coze/api",
+      "comment": "音播报提供释放播放器功能并支持语音识别自定义配置",
+      "type": "minor"
+    }
+  ],
+  "packageName": "@coze/api",
+  "email": "[email protected]"
+}
diff --git a/common/changes/@coze/api/myang-voice-ext-interface_2025-11-27-01-27.json b/common/changes/@coze/api/myang-voice-ext-interface_2025-11-27-01-27.json
@@ -0,0 +1,11 @@
+{
+  "changes": [
+    {
+      "packageName": "@coze/api",
+      "comment": "Clear timeout and close WebSocket before destroying player",
+      "type": "patch"
+    }
+  ],
+  "packageName": "@coze/api",
+  "email": "[email protected]"
+}
diff --git a/packages/coze-js/src/resources/websockets/types.ts b/packages/coze-js/src/resources/websockets/types.ts
@@ -193,7 +193,7 @@ export interface CommonErrorEvent extends BaseEventWithDetail {
   };
 }
 
-interface AudioConfig {
+export interface AudioConfig {
   /** Input audio format, supports pcm/wav/ogg */
   format?: 'pcm' | 'wav' | 'ogg';
   /** Input audio codec, supports pcm/opus/g711a/g711u */

diff --git a/packages/coze-js/src/ws-tools/speech/index.ts b/packages/coze-js/src/ws-tools/speech/index.ts
@@ -186,6 +186,30 @@ class WsSpeechClient {
     this.closeWs();
   }
 
+  /**
+   * Releases wavStreamPlayer resources so callers can dispose the instance.
+   * In a mobile browser environment, if the WsSpeechClient is instantiated multiple times,
+   * you can additionally call the destroyPlayer method to release resources and prevent issues with speech playback.
+   */
+  async destroyPlayer() {
+    // Clear any pending timeout first
+    if (this.playbackTimeout) {
+      clearTimeout(this.playbackTimeout);
+      this.playbackTimeout = null;
+    }
+
+    // Ensure WebSocket is closed
+    this.closeWs();
+
+    // Now safe to destroy player and reset state
+    await this.wavStreamPlayer.destroy();
+    this.totalDuration = 0;
+    this.playbackStartTime = null;
+    this.playbackPauseTime = null;
+    this.elapsedBeforePause = 0;
+    this.audioDeltaList.length = 0;
+  }
+
   append(message: string) {
     this.ws?.send({
       id: uuid(),

diff --git a/packages/coze-js/src/ws-tools/transcription/index.ts b/packages/coze-js/src/ws-tools/transcription/index.ts
@@ -1,11 +1,15 @@
 import { v4 as uuid } from 'uuid';
 
-import { WebsocketsEventType } from '../..';
-import BaseWsTranscriptionClient from './base';
 import {
   type AIDenoiserProcessorLevel,
   type AIDenoiserProcessorMode,
 } from '../recorder/pcm-recorder';
+import {
+  type AudioConfig,
+  type TranscriptionsUpdateEvent,
+} from '../../resources/websockets/types';
+import { WebsocketsEventType } from '../..';
+import BaseWsTranscriptionClient from './base';
 
 class WsTranscriptionClient extends BaseWsTranscriptionClient {
   private isRecording = false;
@@ -17,18 +21,45 @@ class WsTranscriptionClient extends BaseWsTranscriptionClient {
     this.ws?.send({
       id: uuid(),
       event_type: WebsocketsEventType.TRANSCRIPTIONS_UPDATE,
-      data: {
-        input_audio: {
-          format: 'pcm',
-          codec: 'pcm',
-          sample_rate: sampleRate,
-          channel: 1,
-          bit_depth: 16,
-        },
-      },
+      data: this.getInitialUpdateData(sampleRate),
     });
   }
 
+  private getInitialUpdateData(
+    sampleRate: number,
+  ): TranscriptionsUpdateEvent['data'] {
+    const defaultInputAudio: AudioConfig = {
+      format: 'pcm',
+      codec: 'pcm',
+      sample_rate: sampleRate,
+      channel: 1,
+      bit_depth: 16,
+    };
+
+    const customUpdateData = this.config.transcriptionUpdateData;
+
+    if (!customUpdateData) {
+      return {
+        input_audio: defaultInputAudio,
+      };
+    }
+
+    if (!customUpdateData.input_audio) {
+      return {
+        ...customUpdateData,
+        input_audio: defaultInputAudio,
+      };
+    }
+
+    return {
+      ...customUpdateData,
+      input_audio: {
+        ...defaultInputAudio,
+        ...customUpdateData.input_audio,
+      },
+    };
+  }
+
   destroy() {
     this.recorder.destroy();
     this.listeners.clear();

diff --git a/packages/coze-js/src/ws-tools/types.ts b/packages/coze-js/src/ws-tools/types.ts
@@ -1,3 +1,4 @@
+import { type TranscriptionsUpdateEvent } from '../resources/websockets/types';
 import {
   type AIDenoiserProcessorLevel,
   type AIDenoiserProcessorMode,
@@ -428,6 +429,11 @@ export interface WsTranscriptionClientOptions extends WsToolsOptions {
   wavRecordConfig?: WavRecordConfig;
   entityType?: 'bot' | 'workflow';
   entityId?: string;
+  /**
+   * en: Custom data payload for initial transcription update
+   * zh: 初始语音识别更新的接口配置数据
+   */
+  transcriptionUpdateData?: TranscriptionsUpdateEvent['data'];
 }
 
 export type WsSimultInterpretationClientOptions = WsTranscriptionClientOptions;