From 7f93df3a927d2f9a694a618d44457fff97c4bc36 Mon Sep 17 00:00:00 2001 From: Muhammad Mugni Hadi Date: Thu, 7 May 2026 15:51:30 +0700 Subject: [PATCH] feat: add audio input support for Gemini translation (#913) Add input_audio and audio_url content type handlers to convertOpenAIContentToParts() in geminiHelper.js, converting OpenAI audio format to Gemini inlineData format. Also add audio types to VALID_OPENAI_CONTENT_TYPES in openaiHelper.js so they are not stripped by filterToOpenAIFormat(). Fixes #912 --- open-sse/translator/helpers/geminiHelper.js | 17 +++++++++++++++++ open-sse/translator/helpers/openaiHelper.js | 2 +- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/open-sse/translator/helpers/geminiHelper.js b/open-sse/translator/helpers/geminiHelper.js index 47fb9f94..b39b3f3a 100644 --- a/open-sse/translator/helpers/geminiHelper.js +++ b/open-sse/translator/helpers/geminiHelper.js @@ -57,6 +57,23 @@ export function convertOpenAIContentToParts(content) { parts.push({ fileData: { fileUri: item.image_url.url, mimeType: "image/*" } }); + } else if (item.type === "input_audio" && item.input_audio?.data) { + const format = item.input_audio.format || "wav"; + const mimeType = format === "mp3" ? "audio/mpeg" : `audio/${format}`; + parts.push({ + inlineData: { mime_type: mimeType, data: item.input_audio.data } + }); + } else if (item.type === "audio_url" && item.audio_url?.url?.startsWith("data:")) { + const url = item.audio_url.url; + const commaIndex = url.indexOf(","); + if (commaIndex !== -1) { + const mimePart = url.substring(5, commaIndex); + const data = url.substring(commaIndex + 1); + const mimeType = mimePart.split(";")[0]; + parts.push({ + inlineData: { mime_type: mimeType, data: data } + }); + } } } } diff --git a/open-sse/translator/helpers/openaiHelper.js b/open-sse/translator/helpers/openaiHelper.js index 18723930..0577cfc5 100644 --- a/open-sse/translator/helpers/openaiHelper.js +++ b/open-sse/translator/helpers/openaiHelper.js @@ -1,7 +1,7 @@ // OpenAI helper functions for translator // Valid OpenAI content block types -export const VALID_OPENAI_CONTENT_TYPES = ["text", "image_url", "image"]; +export const VALID_OPENAI_CONTENT_TYPES = ["text", "image_url", "image", "input_audio", "audio_url"]; export const VALID_OPENAI_MESSAGE_TYPES = ["text", "image_url", "image", "tool_calls", "tool_result"]; // Filter messages to OpenAI standard format