Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
176 changes: 176 additions & 0 deletions client-js/client/client.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,16 @@ import {
LLMContextMessage,
LLMFunctionCallData,
LLMFunctionCallResult,
MimeTypeMapping,
Participant,
PipecatMetricsData,
RTVIEvent,
RTVIEvents,
RTVIFile,
RTVIFileFormat,
RTVIMessage,
RTVIMessageType,
SendFileOptions,
SendTextOptions,
setAboutClient,
TranscriptData,
Expand Down Expand Up @@ -688,6 +692,178 @@ export class PipecatClient extends RTVIEventEmitter {
);
}

@transportReady
public async sendFile(
file: RTVIFile | File,
content: string,
options: SendFileOptions = {}
) {
let rtvi_file = file instanceof File ? ({} as RTVIFile) : file;
let mimeType: string = file instanceof File ? file.type : rtvi_file.format.toLowerCase();
if (mimeType in MimeTypeMapping) {
mimeType = MimeTypeMapping[mimeType as RTVIFileFormat];
}
rtvi_file.format = mimeType;

const sendFileMessage = async () => {
await this._sendMessage(
new RTVIMessage(RTVIMessageType.SEND_FILE, {
file: rtvi_file,
content,
options,
})
);
};

let uploadFile: File | undefined;
if (file instanceof File) {
// Estimate the message size with base64 encoding overhead (~33% larger)
// Add buffer for message wrapper overhead. This saves us from having to
// unnecessarily read the file into memory and encode it to base64.
const estimatedEncodedSize = Math.ceil(file.size * 1.37) + 1000;

if (estimatedEncodedSize > this._transport.maxMessageSize) {
uploadFile = file;
} else {
return new Promise<void>((resolve) => {
const reader = new FileReader();
reader.onload = async (e) => {
if (!e.target?.result) {
throw new RTVIErrors.RTVIError("Could not read file data");
}
const fileContent = e.target.result as string;

rtvi_file = {
format: file.type,
source: {
type: "bytes",
bytes: fileContent,
},
};
await sendFileMessage();
resolve();
};

reader.readAsDataURL(file);
});
}
} else if (rtvi_file.source.type === "bytes") {
const estimatedSize = rtvi_file.source.bytes.length + 1000;
if (estimatedSize > this._transport.maxMessageSize) {
// Convert bytes to File and upload
const byteString = atob(
rtvi_file.source.bytes.split(",")[1] || rtvi_file.source.bytes
);
const ab = new ArrayBuffer(byteString.length);
const ia = new Uint8Array(ab);
for (let i = 0; i < byteString.length; i++) {
ia[i] = byteString.charCodeAt(i);
}
const blob = new Blob([ab], { type: mimeType });
uploadFile = new File(
[blob],
rtvi_file.name || "uploaded_file",
{ type: mimeType }
);
}
}

if (uploadFile) {
// File is too large for transport, upload it first
rtvi_file = await this.uploadFile(uploadFile);
}

await sendFileMessage();
}

/**
* Upload a file to a specified endpoint or the default files endpoint.
* @param file - The File to upload
* @param uploadFileParams - Optional APIRequest. If not provided, constructs
* endpoint from startBotParams.endpoint by replacing the path with /files
* @returns Promise resolving to RTVIFile with name, format, and FileUrl source
*/
public async uploadFile(
file: File,
uploadFileParams?: APIRequest
): Promise<RTVIFile> {
let uploadUrl: string;
let headers: Headers | undefined;
let timeout: number | undefined;

if (uploadFileParams) {
const { endpoint } = uploadFileParams;
headers = uploadFileParams.headers;
timeout = uploadFileParams.timeout;

if (endpoint instanceof URL) {
uploadUrl = endpoint.toString();
} else if (typeof endpoint === "string") {
uploadUrl = endpoint;
} else if (
typeof Request !== "undefined" &&
endpoint instanceof Request
) {
uploadUrl = endpoint.url;
} else {
throw new RTVIErrors.RTVIError(
"Unable to determine URL from uploadFileParams.endpoint"
);
}
} else {
// Construct from startBotParams
const startBotParams = this._transport.startBotParams;
if (!startBotParams?.endpoint) {
throw new RTVIErrors.RTVIError(
"No uploadFileParams provided and no startBotParams.endpoint available"
);
}

timeout = startBotParams.timeout;

let baseUrl: URL;
if (startBotParams.endpoint instanceof URL) {
baseUrl = startBotParams.endpoint;
headers = startBotParams.headers;
} else if (typeof startBotParams.endpoint === "string") {
baseUrl = new URL(startBotParams.endpoint);
headers = startBotParams.headers;
} else if (
typeof Request !== "undefined" &&
startBotParams.endpoint instanceof Request
) {
baseUrl = new URL(startBotParams.endpoint.url);
headers = new Headers(startBotParams.endpoint.headers);
} else {
throw new RTVIErrors.RTVIError(
"Unable to determine base URL from startBotParams.endpoint"
);
}

// Change the path to /files
uploadUrl = `${baseUrl.origin}/files`;
}

// Create FormData with the file
const formData = new FormData();
formData.append("file", file);

// Create the Request object
// Note: Don't set Content-Type header - browser sets it automatically with boundary
const request = new Request(uploadUrl, {
method: "POST",
mode: "cors",
body: formData,
headers: headers ? Object.fromEntries(headers.entries()) : undefined,
});

const response = await makeRequest(
{ endpoint: request, timeout },
this._abortController
);
return response as RTVIFile;
}

/**
* Disconnects the bot, but keeps the session alive
*/
Expand Down
108 changes: 108 additions & 0 deletions client-js/rtvi/messages.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ export enum RTVIMessageType {
// Client-to-server messages
CLIENT_MESSAGE = "client-message",
SEND_TEXT = "send-text",
SEND_FILE = "send-file",
// DEPRECATED
APPEND_TO_CONTEXT = "append-to-context",

Expand Down Expand Up @@ -187,6 +188,113 @@ export type SendTextOptions = {
audio_response?: boolean;
};

type Serializable =
| string
| number
| boolean
| null
| Serializable[]
| { [key: number | string]: Serializable };

export type RTVIImageFormat =
| "png"
| "jpg"
| "jpeg"
| "webp"
| "gif"
| "heic"
| "hief";
export type RTVIDocFormat =
| "pdf"
| "csv"
| "txt"
| "md"
| "doc"
| "docx"
| "xls"
| "xlsx"
| "json"
| "html"
| "css"
| "javascript";
export type RTVIMediaFormat =
| "mp3"
| "wav"
| "ogg"
| "aac"
| "mp4"
| "webm"
| "ogg"
| "avi";
export type RTVIFileFormat = RTVIImageFormat | RTVIDocFormat | RTVIMediaFormat;

export const MimeTypeMapping: Record<RTVIFileFormat, string> = {
// Images
png: "image/png",
jpg: "image/jpeg",
jpeg: "image/jpeg",
webp: "image/webp",
gif: "image/gif",
heic: "image/heic",
hief: "image/heif",
// Documents
pdf: "application/pdf",
csv: "text/csv",
txt: "text/plain",
md: "text/markdown",
doc: "application/msword",
docx: "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
xls: "application/vnd.ms-excel",
xlsx: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
json: "application/json",
html: "text/html",
css: "text/css",
javascript: "application/javascript",
// Media
mp3: "audio/mpeg",
wav: "audio/wav",
ogg: "audio/ogg",
aac: "audio/aac",
mp4: "video/mp4",
webm: "video/webm",
avi: "video/x-msvideo",
};

export type FileSourceType = "bytes" | "url";

export type FileBytes = {
type: Extract<FileSourceType, "bytes">;
bytes: string;
width?: number;
height?: number;
};
export type FileUrl = {
type: Extract<FileSourceType, "url">;
url: string | URL;
};

export type RTVIFile = {
name?: string;
// RTVI definition takes the Mime type here, but in client-js, we support
// clients providing shorthands defined above and we map them to Mime types
format: string;
source: FileBytes | FileUrl;
};

export type SendFileOptions = {
run_immediately?: boolean;
audio_response?: boolean;
// for things like 'detail' in openAI or 'citations' in Bedrock
custom_options?: { [key: number | string]: Serializable };
};


export type FileSupport = {
formats: string[];
sources: FileSourceType[];
maxSize: number; // bytes
};

/** DEPRECATED */
export type LLMContextMessage = {
role: "user" | "assistant";
Expand Down