diff --git a/client-js/client/client.ts b/client-js/client/client.ts index 87398b3..2dded18 100644 --- a/client-js/client/client.ts +++ b/client-js/client/client.ts @@ -19,12 +19,16 @@ import { LLMContextMessage, LLMFunctionCallData, LLMFunctionCallResult, + MimeTypeMapping, Participant, PipecatMetricsData, RTVIEvent, RTVIEvents, + RTVIFile, + RTVIFileFormat, RTVIMessage, RTVIMessageType, + SendFileOptions, SendTextOptions, setAboutClient, TranscriptData, @@ -688,6 +692,178 @@ export class PipecatClient extends RTVIEventEmitter { ); } + @transportReady + public async sendFile( + file: RTVIFile | File, + content: string, + options: SendFileOptions = {} + ) { + let rtvi_file = file instanceof File ? ({} as RTVIFile) : file; + let mimeType: string = file instanceof File ? file.type : rtvi_file.format.toLowerCase(); + if (mimeType in MimeTypeMapping) { + mimeType = MimeTypeMapping[mimeType as RTVIFileFormat]; + } + rtvi_file.format = mimeType; + + const sendFileMessage = async () => { + await this._sendMessage( + new RTVIMessage(RTVIMessageType.SEND_FILE, { + file: rtvi_file, + content, + options, + }) + ); + }; + + let uploadFile: File | undefined; + if (file instanceof File) { + // Estimate the message size with base64 encoding overhead (~33% larger) + // Add buffer for message wrapper overhead. This saves us from having to + // unnecessarily read the file into memory and encode it to base64. + const estimatedEncodedSize = Math.ceil(file.size * 1.37) + 1000; + + if (estimatedEncodedSize > this._transport.maxMessageSize) { + uploadFile = file; + } else { + return new Promise((resolve) => { + const reader = new FileReader(); + reader.onload = async (e) => { + if (!e.target?.result) { + throw new RTVIErrors.RTVIError("Could not read file data"); + } + const fileContent = e.target.result as string; + + rtvi_file = { + format: file.type, + source: { + type: "bytes", + bytes: fileContent, + }, + }; + await sendFileMessage(); + resolve(); + }; + + reader.readAsDataURL(file); + }); + } + } else if (rtvi_file.source.type === "bytes") { + const estimatedSize = rtvi_file.source.bytes.length + 1000; + if (estimatedSize > this._transport.maxMessageSize) { + // Convert bytes to File and upload + const byteString = atob( + rtvi_file.source.bytes.split(",")[1] || rtvi_file.source.bytes + ); + const ab = new ArrayBuffer(byteString.length); + const ia = new Uint8Array(ab); + for (let i = 0; i < byteString.length; i++) { + ia[i] = byteString.charCodeAt(i); + } + const blob = new Blob([ab], { type: mimeType }); + uploadFile = new File( + [blob], + rtvi_file.name || "uploaded_file", + { type: mimeType } + ); + } + } + + if (uploadFile) { + // File is too large for transport, upload it first + rtvi_file = await this.uploadFile(uploadFile); + } + + await sendFileMessage(); + } + + /** + * Upload a file to a specified endpoint or the default files endpoint. + * @param file - The File to upload + * @param uploadFileParams - Optional APIRequest. If not provided, constructs + * endpoint from startBotParams.endpoint by replacing the path with /files + * @returns Promise resolving to RTVIFile with name, format, and FileUrl source + */ + public async uploadFile( + file: File, + uploadFileParams?: APIRequest + ): Promise { + let uploadUrl: string; + let headers: Headers | undefined; + let timeout: number | undefined; + + if (uploadFileParams) { + const { endpoint } = uploadFileParams; + headers = uploadFileParams.headers; + timeout = uploadFileParams.timeout; + + if (endpoint instanceof URL) { + uploadUrl = endpoint.toString(); + } else if (typeof endpoint === "string") { + uploadUrl = endpoint; + } else if ( + typeof Request !== "undefined" && + endpoint instanceof Request + ) { + uploadUrl = endpoint.url; + } else { + throw new RTVIErrors.RTVIError( + "Unable to determine URL from uploadFileParams.endpoint" + ); + } + } else { + // Construct from startBotParams + const startBotParams = this._transport.startBotParams; + if (!startBotParams?.endpoint) { + throw new RTVIErrors.RTVIError( + "No uploadFileParams provided and no startBotParams.endpoint available" + ); + } + + timeout = startBotParams.timeout; + + let baseUrl: URL; + if (startBotParams.endpoint instanceof URL) { + baseUrl = startBotParams.endpoint; + headers = startBotParams.headers; + } else if (typeof startBotParams.endpoint === "string") { + baseUrl = new URL(startBotParams.endpoint); + headers = startBotParams.headers; + } else if ( + typeof Request !== "undefined" && + startBotParams.endpoint instanceof Request + ) { + baseUrl = new URL(startBotParams.endpoint.url); + headers = new Headers(startBotParams.endpoint.headers); + } else { + throw new RTVIErrors.RTVIError( + "Unable to determine base URL from startBotParams.endpoint" + ); + } + + // Change the path to /files + uploadUrl = `${baseUrl.origin}/files`; + } + + // Create FormData with the file + const formData = new FormData(); + formData.append("file", file); + + // Create the Request object + // Note: Don't set Content-Type header - browser sets it automatically with boundary + const request = new Request(uploadUrl, { + method: "POST", + mode: "cors", + body: formData, + headers: headers ? Object.fromEntries(headers.entries()) : undefined, + }); + + const response = await makeRequest( + { endpoint: request, timeout }, + this._abortController + ); + return response as RTVIFile; + } + /** * Disconnects the bot, but keeps the session alive */ diff --git a/client-js/rtvi/messages.ts b/client-js/rtvi/messages.ts index 1a9f636..94283c0 100644 --- a/client-js/rtvi/messages.ts +++ b/client-js/rtvi/messages.ts @@ -25,6 +25,7 @@ export enum RTVIMessageType { // Client-to-server messages CLIENT_MESSAGE = "client-message", SEND_TEXT = "send-text", + SEND_FILE = "send-file", // DEPRECATED APPEND_TO_CONTEXT = "append-to-context", @@ -187,6 +188,113 @@ export type SendTextOptions = { audio_response?: boolean; }; +type Serializable = + | string + | number + | boolean + | null + | Serializable[] + | { [key: number | string]: Serializable }; + +export type RTVIImageFormat = + | "png" + | "jpg" + | "jpeg" + | "webp" + | "gif" + | "heic" + | "hief"; +export type RTVIDocFormat = + | "pdf" + | "csv" + | "txt" + | "md" + | "doc" + | "docx" + | "xls" + | "xlsx" + | "json" + | "html" + | "css" + | "javascript"; +export type RTVIMediaFormat = + | "mp3" + | "wav" + | "ogg" + | "aac" + | "mp4" + | "webm" + | "ogg" + | "avi"; +export type RTVIFileFormat = RTVIImageFormat | RTVIDocFormat | RTVIMediaFormat; + +export const MimeTypeMapping: Record = { + // Images + png: "image/png", + jpg: "image/jpeg", + jpeg: "image/jpeg", + webp: "image/webp", + gif: "image/gif", + heic: "image/heic", + hief: "image/heif", + // Documents + pdf: "application/pdf", + csv: "text/csv", + txt: "text/plain", + md: "text/markdown", + doc: "application/msword", + docx: "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + xls: "application/vnd.ms-excel", + xlsx: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + json: "application/json", + html: "text/html", + css: "text/css", + javascript: "application/javascript", + // Media + mp3: "audio/mpeg", + wav: "audio/wav", + ogg: "audio/ogg", + aac: "audio/aac", + mp4: "video/mp4", + webm: "video/webm", + avi: "video/x-msvideo", +}; + +export type FileSourceType = "bytes" | "url"; + +export type FileBytes = { + type: Extract; + bytes: string; + width?: number; + height?: number; +}; +export type FileUrl = { + type: Extract; + url: string | URL; +}; + +export type RTVIFile = { + name?: string; + // RTVI definition takes the Mime type here, but in client-js, we support + // clients providing shorthands defined above and we map them to Mime types + format: string; + source: FileBytes | FileUrl; +}; + +export type SendFileOptions = { + run_immediately?: boolean; + audio_response?: boolean; + // for things like 'detail' in openAI or 'citations' in Bedrock + custom_options?: { [key: number | string]: Serializable }; +}; + + +export type FileSupport = { + formats: string[]; + sources: FileSourceType[]; + maxSize: number; // bytes +}; + /** DEPRECATED */ export type LLMContextMessage = { role: "user" | "assistant";