feat(gemini): convert OpenAI SSE to Gemini SSE format in /v1beta/models route (#225)

* fix(translator): filter nameless hosted tools when converting Responses API to Chat format Codex CLI sends "hosted" tools (e.g. `request_user_input`) via the OpenAI Responses API. These tools have no explicit `name` field. The previous `body.tools.map()` pass propagated `name: undefined` into the resulting Chat Completions function declarations, which then became anonymous `functionDeclarations` after the OpenAI→Gemini translation step. Gemini strictly requires every function declaration to have a valid name and rejects the entire request with: GenerateContentRequest.tools[0].function_declarations[4].name: Invalid function name. Must start with a letter or an underscore. Fix: filter out any Responses API tool that lacks a non-empty `name` string before converting to `{ type: "function", function: { name, ... } }`. Named function tools are unaffected; only unnamed hosted tools are skipped. Fixes: Gemini 400 error when Codex CLI is routed through 9router. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com> * feat(gemini): convert OpenAI SSE to Gemini SSE format in /v1beta/models route The @google/genai SDK always uses :streamGenerateContent?alt=sse for chat and expects Gemini SSE chunk format. The upstream handleChat returns OpenAI SSE format, causing the SDK to crash on the [DONE] sentinel. Changes: - Add transformOpenAISSEToGeminiSSE() using TransformStream that converts each OpenAI SSE chunk (choices[0].delta) to Gemini SSE format (candidates[0].content.parts) on the fly - Drop the OpenAI [DONE] sentinel (Gemini SSE ends by stream close) - Map finish_reason -> finishReason, attach usageMetadata on final chunk - Support reasoning_content -> thought: true parts for thinking models - Refactor finishReasonMap to shared FINISH_REASON_MAP constant - Fix streaming dispatch: stream=true now calls transformOpenAISSEToGeminiSSE instead of passing OpenAI SSE through raw Fixes: SyntaxError: "[DONE]" is not valid JSON in Gemini CLI 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com> --------- Co-authored-by: Claude <noreply@anthropic.com>
2026-05-08 12:01:28 +00:00 · 2026-03-02 05:36:08 +03:00
parent 4903a9b2cb
commit 0c832c9c59
1 changed files with 233 additions and 17 deletions
--- a/src/app/api/v1beta/models/[...path]/route.js
+++ b/src/app/api/v1beta/models/[...path]/route.js
@@ -28,34 +28,59 @@ export async function OPTIONS() {
 }

 /**
- * POST /v1beta/models/{model}:generateContent - Gemini compatible endpoint
- * Converts Gemini format to internal format and handles via handleChat
+ * POST /v1beta/models/{model}:generateContent        — non-streaming
+ * POST /v1beta/models/{model}:streamGenerateContent  — streaming (SSE)
+ *
+ * Streaming intent is determined by the URL action suffix (canonical Gemini API
+ * convention), NOT by a body field. generationConfig.stream is not a real
+ * Gemini API field and Gemini CLI never sets it.
+ *
+ * The @google/genai SDK always uses :streamGenerateContent?alt=sse for chat.
+ * The upstream handleChat returns OpenAI SSE format; we transform it to
+ * Gemini SSE format on the fly via transformOpenAISSEToGeminiSSE().
 */
 export async function POST(request, { params }) {
  await ensureInitialized();

  try {
    const { path } = await params;
-    // path = ["provider", "model:generateContent"] or ["model:generateContent"]
-    
+    // path = ["provider", "model:action"] or ["model:action"]
+
    let model;
+    let action; // ":generateContent" | ":streamGenerateContent"
+
    if (path.length >= 2) {
      // Format: /v1beta/models/provider/model:generateContent
      const provider = path[0];
      const modelAction = path[1];
-      const modelName = modelAction.replace(":generateContent", "").replace(":streamGenerateContent", "");
-      model = `${provider}/${modelName}`;
+      action = modelAction.includes(":streamGenerateContent")
+        ? ":streamGenerateContent"
+        : ":generateContent";
+      const modelName = modelAction
+        .replace(":streamGenerateContent", "")
+        .replace(":generateContent", "");
+      model = provider + "/" + modelName;
    } else {
      // Format: /v1beta/models/model:generateContent
      const modelAction = path[0];
-      model = modelAction.replace(":generateContent", "").replace(":streamGenerateContent", "");
+      action = modelAction.includes(":streamGenerateContent")
+        ? ":streamGenerateContent"
+        : ":generateContent";
+      model = modelAction
+        .replace(":streamGenerateContent", "")
+        .replace(":generateContent", "");
    }

    const body = await request.json();
-    
-    // Convert Gemini format to OpenAI/internal format
-    const convertedBody = convertGeminiToInternal(body, model);
-    
+
+    // Streaming is determined by URL action suffix:
+    //   :streamGenerateContent => stream: true  (SSE)
+    //   :generateContent       => stream: false (plain JSON)
+    const stream = action === ":streamGenerateContent";
+
+    // Convert Gemini request format to OpenAI/internal format
+    const convertedBody = convertGeminiToInternal(body, model, stream);
+
    // Create new request with converted body
    const newRequest = new Request(request.url, {
      method: "POST",
@@ -63,7 +88,17 @@ export async function POST(request, { params }) {
      body: JSON.stringify(convertedBody),
    });

-    return await handleChat(newRequest);
+    const response = await handleChat(newRequest);
+
+    if (stream) {
+      // Transform OpenAI SSE => Gemini SSE on the fly.
+      // The @google/genai SDK always uses :streamGenerateContent?alt=sse and
+      // expects Gemini SSE chunks (no [DONE] sentinel — stream just closes).
+      return transformOpenAISSEToGeminiSSE(response, model);
+    } else {
+      // Convert OpenAI JSON response => Gemini GenerateContentResponse
+      return await convertOpenAIResponseToGemini(response, model);
+    }
  } catch (error) {
    console.log("Error handling Gemini request:", error);
    return Response.json(
@@ -74,9 +109,13 @@ export async function POST(request, { params }) {
 }

 /**
- * Convert Gemini request format to internal format
+ * Convert Gemini request format to OpenAI/internal format.
+ *
+ * @param {object} geminiBody  - parsed Gemini request body
+ * @param {string} model       - resolved model string (e.g. "gemini-pro-high")
+ * @param {boolean} stream     - whether to stream (from URL action)
 */
-function convertGeminiToInternal(geminiBody, model) {
+function convertGeminiToInternal(geminiBody, model, stream) {
  const messages = [];

  // Convert system instruction
@@ -98,9 +137,6 @@ function convertGeminiToInternal(geminiBody, model) {
    }
  }

-  // Determine if streaming
-  const stream = geminiBody.generationConfig?.stream !== false;
-
  return {
    model,
    messages,
@@ -111,3 +147,183 @@ function convertGeminiToInternal(geminiBody, model) {
  };
 }

+/** Map OpenAI finish_reason => Gemini finishReason */
+const FINISH_REASON_MAP = {
+  stop: "STOP",
+  length: "MAX_TOKENS",
+  tool_calls: "STOP",
+  content_filter: "SAFETY",
+};
+
+/**
+ * Transform an OpenAI SSE stream into a Gemini SSE stream.
+ *
+ * OpenAI SSE format (what handleChat returns):
+ *   data: {"choices":[{"delta":{"content":"Hi"},"finish_reason":null}]}
+ *   data: {"choices":[{"delta":{},"finish_reason":"stop"}],"usage":{...}}
+ *   data: [DONE]
+ *
+ * Gemini SSE format (what @google/genai SDK expects):
+ *   data: {"candidates":[{"content":{"role":"model","parts":[{"text":"Hi"}]},"index":0}]}
+ *   data: {"candidates":[{"content":{"role":"model","parts":[{"text":""}]},"finishReason":"STOP","index":0}],"usageMetadata":{...}}
+ *   (stream closes — no [DONE])
+ */
+function transformOpenAISSEToGeminiSSE(upstreamResponse, model) {
+  if (!upstreamResponse.ok || !upstreamResponse.body) {
+    return upstreamResponse;
+  }
+
+  const decoder = new TextDecoder();
+  const encoder = new TextEncoder();
+
+  const transformStream = new TransformStream({
+    transform(chunk, controller) {
+      const text = decoder.decode(chunk, { stream: true });
+      const lines = text.split("\n");
+
+      for (const line of lines) {
+        if (!line.startsWith("data:")) continue;
+
+        const data = line.slice(5).trim();
+
+        // Drop empty lines and the OpenAI [DONE] sentinel.
+        // Gemini SSE ends by stream close, no sentinel needed.
+        if (!data || data === "[DONE]") continue;
+
+        let parsed;
+        try {
+          parsed = JSON.parse(data);
+        } catch {
+          continue;
+        }
+
+        const choice = parsed.choices?.[0];
+        if (!choice) continue;
+
+        const delta = choice.delta || {};
+
+        const parts = [];
+        if (delta.reasoning_content) {
+          parts.push({ text: delta.reasoning_content, thought: true });
+        }
+        if (delta.content) {
+          parts.push({ text: delta.content });
+        }
+
+        // Skip pure role-only deltas with no content and no finish signal
+        if (parts.length === 0 && !choice.finish_reason) continue;
+
+        const candidate = {
+          content: {
+            role: "model",
+            parts: parts.length > 0 ? parts : [{ text: "" }],
+          },
+          index: 0,
+        };
+
+        if (choice.finish_reason) {
+          candidate.finishReason = FINISH_REASON_MAP[choice.finish_reason] || "STOP";
+        }
+
+        const geminiChunk = { candidates: [candidate] };
+
+        // Attach usage + modelVersion on the final chunk (when finish_reason is set)
+        if (choice.finish_reason && parsed.usage) {
+          geminiChunk.usageMetadata = {
+            promptTokenCount: parsed.usage.prompt_tokens || 0,
+            candidatesTokenCount: parsed.usage.completion_tokens || 0,
+            totalTokenCount: parsed.usage.total_tokens || 0,
+          };
+          const reasoningTokens =
+            parsed.usage.completion_tokens_details?.reasoning_tokens;
+          if (reasoningTokens) {
+            geminiChunk.usageMetadata.thoughtsTokenCount = reasoningTokens;
+          }
+          geminiChunk.modelVersion = parsed.model || model;
+        }
+
+        controller.enqueue(
+          encoder.encode("data: " + JSON.stringify(geminiChunk) + "\r\n\r\n")
+        );
+      }
+    },
+    // No flush() needed: Gemini SSE ends by stream close, not a sentinel
+  });
+
+  return new Response(upstreamResponse.body.pipeThrough(transformStream), {
+    status: 200,
+    headers: {
+      "Content-Type": "text/event-stream",
+      "Cache-Control": "no-cache",
+      "Access-Control-Allow-Origin": "*",
+    },
+  });
+}
+
+/**
+ * Convert an OpenAI chat.completion JSON response into a Gemini
+ * GenerateContentResponse so that Gemini CLI can parse it.
+ */
+async function convertOpenAIResponseToGemini(response, model) {
+  if (!response.ok) return response;
+
+  let body;
+  try {
+    body = await response.json();
+  } catch {
+    return response;
+  }
+
+  if (body.candidates) return Response.json(body, {
+    headers: { "Content-Type": "application/json", "Access-Control-Allow-Origin": "*" }
+  });
+
+  if (body.error) return Response.json(body, {
+    status: response.status,
+    headers: { "Content-Type": "application/json", "Access-Control-Allow-Origin": "*" }
+  });
+
+  const choice = body.choices?.[0];
+  if (!choice) {
+    return Response.json(body, {
+      headers: { "Content-Type": "application/json", "Access-Control-Allow-Origin": "*" }
+    });
+  }
+
+  const { message, finish_reason } = choice;
+
+  const parts = [];
+  if (message.reasoning_content) {
+    parts.push({ text: message.reasoning_content, thought: true });
+  }
+  parts.push({ text: message.content || "" });
+
+  const finishReason = FINISH_REASON_MAP[finish_reason] || "STOP";
+
+  const geminiResponse = {
+    candidates: [
+      {
+        content: { role: "model", parts },
+        finishReason,
+        index: 0,
+      },
+    ],
+    modelVersion: body.model || model,
+  };
+
+  if (body.usage) {
+    geminiResponse.usageMetadata = {
+      promptTokenCount: body.usage.prompt_tokens || 0,
+      candidatesTokenCount: body.usage.completion_tokens || 0,
+      totalTokenCount: body.usage.total_tokens || 0,
+    };
+    const reasoningTokens = body.usage.completion_tokens_details?.reasoning_tokens;
+    if (reasoningTokens) {
+      geminiResponse.usageMetadata.thoughtsTokenCount = reasoningTokens;
+    }
+  }
+
+  return Response.json(geminiResponse, {
+    headers: { "Content-Type": "application/json", "Access-Control-Allow-Origin": "*" }
+  });
+}