genkit-ai · ssbushi · Feb 27, 2026 · Feb 20, 2026 · Feb 21, 2026 · Feb 24, 2026
diff --git a/js/plugins/compat-oai/src/audio.ts b/js/plugins/compat-oai/src/audio.ts
@@ -28,6 +28,8 @@ import type {
   SpeechCreateParams,
   Transcription,
   TranscriptionCreateParams,
+  TranslationCreateParams,
+  TranslationCreateResponse,
 } from 'openai/resources/audio/index.mjs';
 import { PluginOptions } from './index.js';
 import { maybeCreateRequestScopedOpenAIClient, toModelName } from './utils.js';
@@ -40,8 +42,12 @@ export type TranscriptionRequestBuilder = (
   req: GenerateRequest,
   params: TranscriptionCreateParams
 ) => void;
+export type TranslationRequestBuilder = (
+  req: GenerateRequest,
+  params: TranslationCreateParams
+) => void;
 
-export const TRANSCRIPTION_MODEL_INFO = {
+export const TRANSCRIPTION_MODEL_INFO: ModelInfo = {
   supports: {
     media: true,
     output: ['text', 'json'],
@@ -61,6 +67,16 @@ export const SPEECH_MODEL_INFO: ModelInfo = {
   },
 };
 
+export const TRANSLATION_MODEL_INFO: ModelInfo = {
+  supports: {
+    media: true,
+    output: ['text', 'json'],
+    multiturn: false,
+    systemRole: false,
+    tools: false,
+  },
+};
+
 const ChunkingStrategySchema = z.object({
   type: z.string(),
   prefix_padding_ms: z.number().int().optional(),
@@ -92,6 +108,14 @@ export const SpeechConfigSchema = z.object({
     .optional(),
 });
 
+export const TranslationConfigSchema = GenerationCommonConfigSchema.pick({
+  temperature: true,
+}).extend({
+  response_format: z
+    .enum(['json', 'text', 'srt', 'verbose_json', 'vtt'])
+    .optional(),
+});
+
 /**
  * Supported media formats for Audio generation
  */
@@ -420,3 +444,171 @@ export function compatOaiTranscriptionModelRef<
     namespace,
   });
 }
+
+function toTranslationRequest(
+  modelName: string,
+  request: GenerateRequest,
+  requestBuilder?: TranslationRequestBuilder
+): TranslationCreateParams {
+  const message = new Message(request.messages[0]);
+  const media = message.media;
+  if (!media?.url) {
+    throw new Error('No media found in the request');
+  }
+  const mediaBuffer = Buffer.from(
+    media.url.slice(media.url.indexOf(',') + 1),
+    'base64'
+  );
+  const mediaFile = new File([mediaBuffer], 'input', {
+    type:
+      media.contentType ??
+      media.url.slice('data:'.length, media.url.indexOf(';')),
+  });
+  const {
+    temperature,
+    version: modelVersion,
+    maxOutputTokens,
+    stopSequences,
+    topK,
+    topP,
+    ...restOfConfig
+  } = request.config ?? {};
+
+  let options: TranslationCreateParams = {
+    model: modelVersion ?? modelName,
+    file: mediaFile,
+    prompt: message.text,
+    temperature,
+  };
+  if (requestBuilder) {
+    requestBuilder(request, options);
+  } else {
+    options = {
+      ...options,
+      ...restOfConfig, // passthrough rest of the config
+    };
+  }
+  const outputFormat = request.output?.format as 'json' | 'text' | 'media';
+  const customFormat = request.config?.response_format;
+  if (outputFormat && customFormat) {
+    if (
+      outputFormat === 'json' &&
+      customFormat !== 'json' &&
+      customFormat !== 'verbose_json'
+    ) {
+      throw new Error(
+        `Custom response format ${customFormat} is not compatible with output format ${outputFormat}`
+      );
+    }
+  }
+  if (outputFormat === 'media') {
+    throw new Error(`Output format ${outputFormat} is not supported.`);
+  }
+  options.response_format = customFormat || outputFormat || 'text';
+  for (const k in options) {
+    if (options[k] === undefined) {
+      delete options[k];
+    }
+  }
+  return options;
+}
+
+function translationToGenerateResponse(
+  result: TranslationCreateResponse | string
+): GenerateResponseData {
+  return {
+    message: {
+      role: 'model',
+      content: [
+        {
+          text: typeof result === 'string' ? result : result.text,
+        },
+      ],
+    },
+    finishReason: 'stop',
+    raw: result,
+  };
+}
+
+/**
+ * Method to define a new Genkit Model that is compatible with Open AI
+ * Translation API.
+ *
+ * These models are to be used to translate audio to text.
+ *
+ * @param params An object containing parameters for defining the OpenAI
+ * translation model.
+ * @param params.ai The Genkit AI instance.
+ * @param params.name The name of the model.
+ * @param params.client The OpenAI client instance.
+ * @param params.modelRef Optional reference to the model's configuration and
+ * custom options.
+ *
+ * @returns the created {@link ModelAction}
+ */
+export function defineCompatOpenAITranslationModel<
+  CustomOptions extends z.ZodTypeAny = z.ZodTypeAny,
+>(params: {
+  name: string;
+  client: OpenAI;
+  pluginOptions?: PluginOptions;
+  modelRef?: ModelReference<CustomOptions>;
+  requestBuilder?: TranslationRequestBuilder;
+}) {
+  const {
+    name,
+    client: defaultClient,
+    pluginOptions,
+    modelRef,
+    requestBuilder,
+  } = params;
+  const modelName = toModelName(name, pluginOptions?.name);
+  const actionName = `${pluginOptions?.name ?? 'compat-oai'}/${modelName}`;
+
+  return model(
+    {
+      name: actionName,
+      ...modelRef?.info,
+      configSchema: modelRef?.configSchema,
+    },
+    async (request, { abortSignal }) => {
+      const params = toTranslationRequest(modelName, request, requestBuilder);
+      const client = maybeCreateRequestScopedOpenAIClient(
+        pluginOptions,
+        request,
+        defaultClient
+      );
+      const result = await client.audio.translations.create(params, {
+        signal: abortSignal,
+      });
+      return translationToGenerateResponse(result);
+    }
+  );
+}
+
+/** Translation ModelRef helper, with reasonable defaults for
+ * OpenAI-compatible providers */
+export function compatOaiTranslationModelRef<
+  CustomOptions extends z.ZodTypeAny = z.ZodTypeAny,
+>(params: {
+  name: string;
+  info?: ModelInfo;
+  configSchema?: CustomOptions;
+  config?: any;
+  namespace?: string;
+}) {
+  const {
+    name,
+    info = TRANSLATION_MODEL_INFO,
+    configSchema,
+    config = undefined,
+    namespace,
+  } = params;
+  return modelRef({
+    name,
+    configSchema: configSchema || (TranslationConfigSchema as any),
+    info,
+    config,
+    namespace,
+  });
+}
diff --git a/js/plugins/compat-oai/src/index.ts b/js/plugins/compat-oai/src/index.ts
@@ -24,12 +24,16 @@ import { toModelName } from './utils.js';
 export {
   SpeechConfigSchema,
   TranscriptionConfigSchema,
+  TranslationConfigSchema,
   compatOaiSpeechModelRef,
   compatOaiTranscriptionModelRef,
+  compatOaiTranslationModelRef,
   defineCompatOpenAISpeechModel,
   defineCompatOpenAITranscriptionModel,
+  defineCompatOpenAITranslationModel,
   type SpeechRequestBuilder,
   type TranscriptionRequestBuilder,
+  type TranslationRequestBuilder,
 } from './audio.js';
 export { defineCompatOpenAIEmbedder } from './embedder.js';
 export {

diff --git a/js/plugins/compat-oai/src/openai/index.ts b/js/plugins/compat-oai/src/openai/index.ts
@@ -30,8 +30,10 @@ import OpenAI from 'openai';
 import {
   defineCompatOpenAISpeechModel,
   defineCompatOpenAITranscriptionModel,
+  defineCompatOpenAITranslationModel,
   SpeechConfigSchema,
   TranscriptionConfigSchema,
+  TranslationConfigSchema,
 } from '../audio.js';
 import { defineCompatOpenAIEmbedder } from '../embedder.js';
 import {
@@ -55,6 +57,10 @@ import {
   SUPPORTED_GPT_MODELS,
 } from './gpt.js';
 import { openAITranscriptionModelRef, SUPPORTED_STT_MODELS } from './stt.js';
+import {
+  openAITranslationModelRef,
+  SUPPORTED_TRANSLATION_MODELS,
+} from './translation.js';
 import { openAISpeechModelRef, SUPPORTED_TTS_MODELS } from './tts.js';
 
 export type OpenAIPluginOptions = Omit<PluginOptions, 'name' | 'baseURL'>;
@@ -88,6 +94,19 @@ function createResolver(pluginOptions: PluginOptions) {
         pluginOptions,
         modelRef,
       });
+    } else if (actionName.includes('translate')) {
+      const modelRef = openAITranslationModelRef({ name: actionName });
+      return defineCompatOpenAITranslationModel({
+        name: modelRef.name,
+        client,
+        pluginOptions,
+        modelRef,
+        requestBuilder: (req, params) => {
+          if (modelRef.name.endsWith('whisper-1-translate')) {
+            params.model = 'whisper-1';
+          }
+        },
+      });
     } else if (
       actionName.includes('whisper') ||
       actionName.includes('transcribe')
@@ -147,6 +166,15 @@ const listActions = async (client: OpenAI): Promise<ActionMetadata[]> => {
           info: modelRef.info,
           configSchema: modelRef.configSchema,
         });
+      } else if (model.id.includes('translate')) {
+        const modelRef =
+          SUPPORTED_TRANSLATION_MODELS[model.id] ??
+          openAITranslationModelRef({ name: model.id });
+        return modelActionMetadata({
+          name: modelRef.name,
+          info: modelRef.info,
+          configSchema: modelRef.configSchema,
+        });
       } else if (
         model.id.includes('whisper') ||
         model.id.includes('transcribe')
@@ -209,6 +237,21 @@ export function openAIPlugin(options?: OpenAIPluginOptions): GenkitPluginV2 {
           })
         )
       );
+      models.push(
+        ...Object.values(SUPPORTED_TRANSLATION_MODELS).map((modelRef) =>
+          defineCompatOpenAITranslationModel({
+            name: modelRef.name,
+            client,
+            pluginOptions,
+            modelRef,
+            requestBuilder: (req, params) => {
+              if (modelRef.name.endsWith('whisper-1-translate')) {
+                params.model = 'whisper-1';
+              }
+            },
+          })
+        )
+      );
       models.push(
         ...Object.values(SUPPORTED_STT_MODELS).map((modelRef) =>
           defineCompatOpenAITranscriptionModel({
@@ -255,6 +298,13 @@ export type OpenAIPlugin = {
       | (`${string}-tts` & {}),
     config?: z.infer<typeof SpeechConfigSchema>
   ): ModelReference<typeof SpeechConfigSchema>;
+  model(
+    name:
+      | keyof typeof SUPPORTED_TRANSLATION_MODELS
+      | (`whisper-${string}-translate` & {})
+      | (`${string}-translate` & {}),
+    config?: z.infer<typeof TranslationConfigSchema>
+  ): ModelReference<typeof TranslationConfigSchema>;
   model(
     name:
       | keyof typeof SUPPORTED_STT_MODELS
@@ -292,6 +342,12 @@ const model = ((name: string, config?: any): ModelReference<z.ZodTypeAny> => {
       config,
     });
   }
+  if (name.includes('translate')) {
+    return openAITranslationModelRef({
+      name,
+      config,
+    });
+  }
   if (name.includes('whisper') || name.includes('transcribe')) {
     return openAITranscriptionModelRef({
       name,

diff --git a/js/plugins/compat-oai/src/openai/translation.ts b/js/plugins/compat-oai/src/openai/translation.ts
@@ -0,0 +1,45 @@
+/**
+ * Copyright 2024 The Fire Company
+ * Copyright 2024 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import { z } from 'genkit';
+import { ModelInfo } from 'genkit/model';
+import { compatOaiTranslationModelRef } from '../audio';
+
+/** OpenAI translation ModelRef helper, same as the OpenAI-compatible spec. */
+export function openAITranslationModelRef<
+  CustomOptions extends z.ZodTypeAny = z.ZodTypeAny,
+>(params: {
+  name: string;
+  info?: ModelInfo;
+  configSchema?: CustomOptions;
+  config?: any;
+}) {
+  return compatOaiTranslationModelRef({ ...params, namespace: 'openai' });
+}
+
+export const SUPPORTED_TRANSLATION_MODELS = {
+  /**
+   * Whisper 1 translation model.
+   *
+   * The actual OpenAI model ID is 'whisper-1', but we use 'whisper-1-translate'
+   * to distinguish it from the 'whisper-1' transcription model. The model ID
+   * is overridden in index.ts to 'whisper-1' when calling the OpenAI API.
+   */
+  'whisper-1-translate': openAITranslationModelRef({
+    name: 'whisper-1-translate',
+  }),
+};
diff --git a/js/testapps/compat-oai/audio-korean.mp3 b/js/testapps/compat-oai/audio-korean.mp3