Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"changes": [
{
"packageName": "@coze/api",
"comment": "音播报提供释放播放器功能并支持语音识别自定义配置",
"type": "minor"
}
],
"packageName": "@coze/api",
"email": "[email protected]"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"changes": [
{
"packageName": "@coze/api",
"comment": "Clear timeout and close WebSocket before destroying player",
"type": "patch"
}
],
"packageName": "@coze/api",
"email": "[email protected]"
}
2 changes: 1 addition & 1 deletion packages/coze-js/src/resources/websockets/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ export interface CommonErrorEvent extends BaseEventWithDetail {
};
}

interface AudioConfig {
export interface AudioConfig {
/** Input audio format, supports pcm/wav/ogg */
format?: 'pcm' | 'wav' | 'ogg';
/** Input audio codec, supports pcm/opus/g711a/g711u */
Expand Down
24 changes: 24 additions & 0 deletions packages/coze-js/src/ws-tools/speech/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,30 @@ class WsSpeechClient {
this.closeWs();
}

/**
* Releases wavStreamPlayer resources so callers can dispose the instance.
* In a mobile browser environment, if the WsSpeechClient is instantiated multiple times,
* you can additionally call the destroyPlayer method to release resources and prevent issues with speech playback.
*/
async destroyPlayer() {
// Clear any pending timeout first
if (this.playbackTimeout) {
clearTimeout(this.playbackTimeout);
this.playbackTimeout = null;
}

// Ensure WebSocket is closed
this.closeWs();

// Now safe to destroy player and reset state
await this.wavStreamPlayer.destroy();
this.totalDuration = 0;
this.playbackStartTime = null;
this.playbackPauseTime = null;
this.elapsedBeforePause = 0;
this.audioDeltaList.length = 0;
}

append(message: string) {
this.ws?.send({
id: uuid(),
Expand Down
53 changes: 42 additions & 11 deletions packages/coze-js/src/ws-tools/transcription/index.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
import { v4 as uuid } from 'uuid';

import { WebsocketsEventType } from '../..';
import BaseWsTranscriptionClient from './base';
import {
type AIDenoiserProcessorLevel,
type AIDenoiserProcessorMode,
} from '../recorder/pcm-recorder';
import {
type AudioConfig,
type TranscriptionsUpdateEvent,
} from '../../resources/websockets/types';
import { WebsocketsEventType } from '../..';
import BaseWsTranscriptionClient from './base';

class WsTranscriptionClient extends BaseWsTranscriptionClient {
private isRecording = false;
Expand All @@ -17,18 +21,45 @@ class WsTranscriptionClient extends BaseWsTranscriptionClient {
this.ws?.send({
id: uuid(),
event_type: WebsocketsEventType.TRANSCRIPTIONS_UPDATE,
data: {
input_audio: {
format: 'pcm',
codec: 'pcm',
sample_rate: sampleRate,
channel: 1,
bit_depth: 16,
},
},
data: this.getInitialUpdateData(sampleRate),
});
}

private getInitialUpdateData(
sampleRate: number,
): TranscriptionsUpdateEvent['data'] {
const defaultInputAudio: AudioConfig = {
format: 'pcm',
codec: 'pcm',
sample_rate: sampleRate,
channel: 1,
bit_depth: 16,
};

const customUpdateData = this.config.transcriptionUpdateData;

if (!customUpdateData) {
return {
input_audio: defaultInputAudio,
};
}

if (!customUpdateData.input_audio) {
return {
...customUpdateData,
input_audio: defaultInputAudio,
};
}

return {
...customUpdateData,
input_audio: {
...defaultInputAudio,
...customUpdateData.input_audio,
},
};
}

destroy() {
this.recorder.destroy();
this.listeners.clear();
Expand Down
6 changes: 6 additions & 0 deletions packages/coze-js/src/ws-tools/types.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import { type TranscriptionsUpdateEvent } from '../resources/websockets/types';
import {
type AIDenoiserProcessorLevel,
type AIDenoiserProcessorMode,
Expand Down Expand Up @@ -428,6 +429,11 @@ export interface WsTranscriptionClientOptions extends WsToolsOptions {
wavRecordConfig?: WavRecordConfig;
entityType?: 'bot' | 'workflow';
entityId?: string;
/**
* en: Custom data payload for initial transcription update
* zh: 初始语音识别更新的接口配置数据
*/
transcriptionUpdateData?: TranscriptionsUpdateEvent['data'];
}

export type WsSimultInterpretationClientOptions = WsTranscriptionClientOptions;