mirror of
https://github.com/decolua/9router.git
synced 2026-05-08 12:01:28 +00:00
feat: add Gemini embeddings support + Letta compatibility fixes
Cherry-picked from decolua/9router#148 (author: xuandung38 / Hồ Xuân Dũng <me@hxd.vn>) - Add Google AI (Gemini) embeddings support for /v1/embeddings endpoint - Add Gemini embedding models: gemini-embedding-001, text-embedding-005, text-embedding-004 - Inject missing object/created fields for Letta and strict OpenAI clients - Strip Azure-specific fields (prompt_filter_results, content_filter_results) from responses - Fix Dockerfile: copy open-sse directory into Docker runner stage Skipped: whitelist message field stripping (commit 3/7/8) — too aggressive for all providers Skipped: default stream=false change (commit 9) — behavior change needs further review Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -22,6 +22,7 @@ RUN mkdir -p /app/data
|
||||
COPY --from=builder /app/public ./public
|
||||
COPY --from=builder /app/.next/static ./.next/static
|
||||
COPY --from=builder /app/.next/standalone ./
|
||||
COPY --from=builder /app/open-sse ./open-sse
|
||||
|
||||
EXPOSE 20128
|
||||
|
||||
|
||||
@@ -135,6 +135,10 @@ export const PROVIDER_MODELS = {
|
||||
{ id: "gemini-2.5-pro", name: "Gemini 2.5 Pro" },
|
||||
{ id: "gemini-2.5-flash", name: "Gemini 2.5 Flash" },
|
||||
{ id: "gemini-2.5-flash-lite", name: "Gemini 2.5 Flash Lite" },
|
||||
// Embedding models
|
||||
{ id: "gemini-embedding-001", name: "Gemini Embedding 001", type: "embedding" },
|
||||
{ id: "text-embedding-005", name: "Text Embedding 005", type: "embedding" },
|
||||
{ id: "text-embedding-004", name: "Text Embedding 004 (Legacy)", type: "embedding" },
|
||||
],
|
||||
openrouter: [
|
||||
{ id: "auto", name: "Auto (Best Available)" },
|
||||
|
||||
@@ -760,6 +760,22 @@ export async function handleChatCore({ body, modelInfo, credentials, log, onCred
|
||||
? translateNonStreamingResponse(responseBody, targetFormat, sourceFormat)
|
||||
: responseBody;
|
||||
|
||||
// Ensure OpenAI-required fields are present (needed for Letta and other strict clients)
|
||||
if (!translatedResponse.object) translatedResponse.object = "chat.completion";
|
||||
if (!translatedResponse.created) translatedResponse.created = Math.floor(Date.now() / 1000);
|
||||
|
||||
// Strip Azure-specific non-standard fields
|
||||
if (translatedResponse.prompt_filter_results !== undefined) {
|
||||
delete translatedResponse.prompt_filter_results;
|
||||
}
|
||||
if (translatedResponse?.choices) {
|
||||
for (const choice of translatedResponse.choices) {
|
||||
if (choice.content_filter_results !== undefined) {
|
||||
delete choice.content_filter_results;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Add buffer and filter usage for client (to prevent CLI context errors)
|
||||
if (translatedResponse?.usage) {
|
||||
const buffered = addBufferToUsage(translatedResponse.usage);
|
||||
|
||||
@@ -4,16 +4,49 @@ import { HTTP_STATUS } from "../config/constants.js";
|
||||
import { getExecutor } from "../executors/index.js";
|
||||
import { refreshWithRetry } from "../services/tokenRefresh.js";
|
||||
|
||||
// Google AI (Gemini) provider aliases / identifiers
|
||||
const GEMINI_PROVIDERS = new Set(["gemini", "google_ai_studio"]);
|
||||
|
||||
/**
|
||||
* Check whether a provider targets the Google AI (Gemini) embeddings API.
|
||||
* @param {string} provider
|
||||
*/
|
||||
function isGeminiProvider(provider) {
|
||||
return GEMINI_PROVIDERS.has(provider);
|
||||
}
|
||||
|
||||
/**
|
||||
* Build the embeddings request body for the target provider.
|
||||
* Most OpenAI-compatible providers accept the same format.
|
||||
* For providers that don't support embeddings natively (chat-only), we return an error.
|
||||
*
|
||||
* - OpenAI / openai-compatible / openrouter: standard { model, input } format.
|
||||
* - Google AI (Gemini): different format per API spec.
|
||||
* - Single input → embedContent body: { model, content: { parts: [{ text }] } }
|
||||
* - Batch input → batchEmbedContents body: { requests: [{ model, content: { parts: [{ text }] } }] }
|
||||
*/
|
||||
function buildEmbeddingsBody(model, input, encodingFormat) {
|
||||
const body = {
|
||||
model,
|
||||
input
|
||||
};
|
||||
function buildEmbeddingsBody(provider, model, input, encodingFormat) {
|
||||
if (isGeminiProvider(provider)) {
|
||||
// Normalize model name: Gemini API expects "models/<model>" prefix
|
||||
const geminiModel = model.startsWith("models/") ? model : `models/${model}`;
|
||||
|
||||
if (Array.isArray(input)) {
|
||||
// Batch request
|
||||
return {
|
||||
requests: input.map((text) => ({
|
||||
model: geminiModel,
|
||||
content: { parts: [{ text: String(text) }] }
|
||||
}))
|
||||
};
|
||||
} else {
|
||||
// Single request
|
||||
return {
|
||||
model: geminiModel,
|
||||
content: { parts: [{ text: String(input) }] }
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// Default: OpenAI format
|
||||
const body = { model, input };
|
||||
if (encodingFormat) {
|
||||
body.encoding_format = encodingFormat;
|
||||
}
|
||||
@@ -22,8 +55,24 @@ function buildEmbeddingsBody(model, input, encodingFormat) {
|
||||
|
||||
/**
|
||||
* Build the URL for the embeddings endpoint based on the provider.
|
||||
* @param {string} provider
|
||||
* @param {string} model
|
||||
* @param {object} credentials
|
||||
* @param {string|string[]} input - used to select single vs batch endpoint for Gemini
|
||||
*/
|
||||
function buildEmbeddingsUrl(provider, credentials) {
|
||||
function buildEmbeddingsUrl(provider, model, credentials, input) {
|
||||
if (isGeminiProvider(provider)) {
|
||||
const apiKey = credentials.apiKey || credentials.accessToken;
|
||||
// Normalize model name for URL path
|
||||
const modelPath = model.startsWith("models/") ? model : `models/${model}`;
|
||||
|
||||
if (Array.isArray(input)) {
|
||||
// batchEmbedContents for array input (keeps response format consistent even for length=1)
|
||||
return `https://generativelanguage.googleapis.com/v1beta/${modelPath}:batchEmbedContents?key=${encodeURIComponent(apiKey)}`;
|
||||
}
|
||||
return `https://generativelanguage.googleapis.com/v1beta/${modelPath}:embedContent?key=${encodeURIComponent(apiKey)}`;
|
||||
}
|
||||
|
||||
switch (provider) {
|
||||
case "openai":
|
||||
return "https://api.openai.com/v1/embeddings";
|
||||
@@ -46,6 +95,11 @@ function buildEmbeddingsUrl(provider, credentials) {
|
||||
function buildEmbeddingsHeaders(provider, credentials) {
|
||||
const headers = { "Content-Type": "application/json" };
|
||||
|
||||
if (isGeminiProvider(provider)) {
|
||||
// Gemini API uses API key as query param — no Authorization header needed
|
||||
return headers;
|
||||
}
|
||||
|
||||
switch (provider) {
|
||||
case "openai":
|
||||
case "openrouter":
|
||||
@@ -56,11 +110,7 @@ function buildEmbeddingsHeaders(provider, credentials) {
|
||||
}
|
||||
break;
|
||||
default:
|
||||
if (provider?.startsWith?.("openai-compatible-")) {
|
||||
headers["Authorization"] = `Bearer ${credentials.apiKey || credentials.accessToken}`;
|
||||
} else {
|
||||
headers["Authorization"] = `Bearer ${credentials.apiKey || credentials.accessToken}`;
|
||||
}
|
||||
headers["Authorization"] = `Bearer ${credentials.apiKey || credentials.accessToken}`;
|
||||
}
|
||||
|
||||
return headers;
|
||||
@@ -68,14 +118,52 @@ function buildEmbeddingsHeaders(provider, credentials) {
|
||||
|
||||
/**
|
||||
* Normalize the embeddings response to OpenAI format.
|
||||
* Most OpenAI-compatible providers already return this format.
|
||||
*
|
||||
* Gemini single response:
|
||||
* { embedding: { values: [0.1, 0.2, ...] } }
|
||||
*
|
||||
* Gemini batch response:
|
||||
* { embeddings: [{ values: [...] }, ...] }
|
||||
*
|
||||
* Target OpenAI format:
|
||||
* { object: "list", data: [{ object: "embedding", index: 0, embedding: [...] }], model, usage: {...} }
|
||||
*/
|
||||
function normalizeEmbeddingsResponse(responseBody, model) {
|
||||
function normalizeEmbeddingsResponse(responseBody, model, provider) {
|
||||
// Already in OpenAI format
|
||||
if (responseBody.object === "list" && Array.isArray(responseBody.data)) {
|
||||
return responseBody;
|
||||
}
|
||||
|
||||
if (isGeminiProvider(provider)) {
|
||||
let embeddingItems = [];
|
||||
|
||||
if (Array.isArray(responseBody.embeddings)) {
|
||||
// Batch response
|
||||
embeddingItems = responseBody.embeddings.map((emb, idx) => ({
|
||||
object: "embedding",
|
||||
index: idx,
|
||||
embedding: emb.values || []
|
||||
}));
|
||||
} else if (responseBody.embedding?.values) {
|
||||
// Single response
|
||||
embeddingItems = [{
|
||||
object: "embedding",
|
||||
index: 0,
|
||||
embedding: responseBody.embedding.values
|
||||
}];
|
||||
}
|
||||
|
||||
return {
|
||||
object: "list",
|
||||
data: embeddingItems,
|
||||
model,
|
||||
usage: {
|
||||
prompt_tokens: 0,
|
||||
total_tokens: 0
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
// Try to handle alternate formats gracefully
|
||||
return responseBody;
|
||||
}
|
||||
@@ -114,16 +202,16 @@ export async function handleEmbeddingsCore({
|
||||
const encodingFormat = body.encoding_format || "float";
|
||||
|
||||
// Determine embeddings URL
|
||||
const url = buildEmbeddingsUrl(provider, credentials);
|
||||
const url = buildEmbeddingsUrl(provider, model, credentials, input);
|
||||
if (!url) {
|
||||
return createErrorResult(
|
||||
HTTP_STATUS.BAD_REQUEST,
|
||||
`Provider '${provider}' does not support embeddings. Use openai, openrouter, or an openai-compatible provider.`
|
||||
`Provider '${provider}' does not support embeddings. Use openai, openrouter, gemini, or an openai-compatible provider.`
|
||||
);
|
||||
}
|
||||
|
||||
const headers = buildEmbeddingsHeaders(provider, credentials);
|
||||
const requestBody = buildEmbeddingsBody(model, input, encodingFormat);
|
||||
const requestBody = buildEmbeddingsBody(provider, model, input, encodingFormat);
|
||||
|
||||
log?.debug?.("EMBEDDINGS", `${provider.toUpperCase()} | ${model} | input_type=${Array.isArray(input) ? `array[${input.length}]` : "string"}`);
|
||||
|
||||
@@ -162,7 +250,12 @@ export async function handleEmbeddingsCore({
|
||||
// Retry with refreshed credentials
|
||||
try {
|
||||
const retryHeaders = buildEmbeddingsHeaders(provider, credentials);
|
||||
providerResponse = await fetch(url, {
|
||||
// Rebuild URL for Gemini since API key is embedded in query param
|
||||
const retryUrl = isGeminiProvider(provider)
|
||||
? buildEmbeddingsUrl(provider, model, credentials, input)
|
||||
: url;
|
||||
|
||||
providerResponse = await fetch(retryUrl, {
|
||||
method: "POST",
|
||||
headers: retryHeaders,
|
||||
body: JSON.stringify(requestBody)
|
||||
@@ -193,7 +286,7 @@ export async function handleEmbeddingsCore({
|
||||
await onRequestSuccess();
|
||||
}
|
||||
|
||||
const normalized = normalizeEmbeddingsResponse(responseBody, model);
|
||||
const normalized = normalizeEmbeddingsResponse(responseBody, model, provider);
|
||||
|
||||
log?.debug?.("EMBEDDINGS", `Success | usage=${JSON.stringify(normalized.usage || {})}`);
|
||||
|
||||
|
||||
@@ -82,6 +82,25 @@ export function createSSEStream(options = {}) {
|
||||
|
||||
const idFixed = fixInvalidId(parsed);
|
||||
|
||||
// Ensure OpenAI-required fields are present on streaming chunks (Letta compat)
|
||||
let fieldsInjected = false;
|
||||
if (!parsed.object) { parsed.object = "chat.completion.chunk"; fieldsInjected = true; }
|
||||
if (!parsed.created) { parsed.created = Math.floor(Date.now() / 1000); fieldsInjected = true; }
|
||||
|
||||
// Strip Azure-specific non-standard fields from streaming chunks
|
||||
if (parsed.prompt_filter_results !== undefined) {
|
||||
delete parsed.prompt_filter_results;
|
||||
fieldsInjected = true;
|
||||
}
|
||||
if (parsed?.choices) {
|
||||
for (const choice of parsed.choices) {
|
||||
if (choice.content_filter_results !== undefined) {
|
||||
delete choice.content_filter_results;
|
||||
fieldsInjected = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!hasValuableContent(parsed, FORMATS.OPENAI)) {
|
||||
continue;
|
||||
}
|
||||
@@ -115,7 +134,7 @@ export function createSSEStream(options = {}) {
|
||||
parsed.usage = filterUsageForFormat(buffered, FORMATS.OPENAI);
|
||||
output = `data: ${JSON.stringify(parsed)}\n`;
|
||||
injectedUsage = true;
|
||||
} else if (idFixed) {
|
||||
} else if (idFixed || fieldsInjected) {
|
||||
output = `data: ${JSON.stringify(parsed)}\n`;
|
||||
injectedUsage = true;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user