mirror of
https://github.com/decolua/9router.git
synced 2026-05-08 12:01:28 +00:00
Enhance image and embedding provider support
- Added new image models for GPT 5.2, 5.3, and 5.4, including capabilities for text-to-image and editing. - Updated embedding handling to include optional dimensions in requests. - Introduced support for custom embedding providers, allowing dynamic fetching and validation of custom nodes. - Improved image generation handling with Codex integration, including progress tracking and error handling. - Enhanced UI components to support adding custom embeddings and displaying their status.
This commit is contained in:
@@ -36,6 +36,10 @@ export const PROVIDER_MODELS = {
|
||||
{ id: "gpt-5.1", name: "GPT 5.1" },
|
||||
{ id: "gpt-5-codex", name: "GPT 5 Codex" },
|
||||
{ id: "gpt-5-codex-mini", name: "GPT 5 Codex Mini" },
|
||||
// Image models (uses image_generation tool, requires Plus/Pro plan)
|
||||
{ id: "gpt-5.4-image", name: "GPT 5.4 Image", type: "image", capabilities: ["text2img", "edit"] },
|
||||
{ id: "gpt-5.3-image", name: "GPT 5.3 Image", type: "image", capabilities: ["text2img", "edit"] },
|
||||
{ id: "gpt-5.2-image", name: "GPT 5.2 Image", type: "image", capabilities: ["text2img", "edit"] },
|
||||
],
|
||||
gc: [ // Gemini CLI
|
||||
{ id: "gemini-3-flash-preview", name: "Gemini 3 Flash Preview" },
|
||||
@@ -206,9 +210,9 @@ export const PROVIDER_MODELS = {
|
||||
{ id: "tts-1-hd", name: "TTS-1 HD", type: "tts" },
|
||||
{ id: "gpt-4o-mini-tts", name: "GPT-4o Mini TTS", type: "tts" },
|
||||
// Image models
|
||||
{ id: "gpt-image-1", name: "GPT Image 1", type: "image" },
|
||||
{ id: "dall-e-3", name: "DALL-E 3", type: "image" },
|
||||
{ id: "dall-e-2", name: "DALL-E 2", type: "image" },
|
||||
{ id: "gpt-image-1", name: "GPT Image 1", type: "image", params: ["n", "size", "quality", "response_format"] },
|
||||
{ id: "dall-e-3", name: "DALL-E 3", type: "image", params: ["size", "quality", "style", "response_format"] },
|
||||
{ id: "dall-e-2", name: "DALL-E 2", type: "image", params: ["n", "size", "response_format"] },
|
||||
],
|
||||
anthropic: [
|
||||
{ id: "claude-sonnet-4-20250514", name: "Claude Sonnet 4" },
|
||||
@@ -236,9 +240,9 @@ export const PROVIDER_MODELS = {
|
||||
{ id: "text-embedding-005", name: "Text Embedding 005", type: "embedding" },
|
||||
{ id: "text-embedding-004", name: "Text Embedding 004 (Legacy)", type: "embedding" },
|
||||
// Image models (Nano Banana)
|
||||
{ id: "gemini-3.1-flash-image-preview", name: "Gemini 3.1 Flash Image (Nano Banana 2)", type: "image" },
|
||||
{ id: "gemini-3-pro-image-preview", name: "Gemini 3 Pro Image (Nano Banana Pro)", type: "image" },
|
||||
{ id: "gemini-2.5-flash-image", name: "Gemini 2.5 Flash Image (Nano Banana)", type: "image" },
|
||||
{ id: "gemini-3.1-flash-image-preview", name: "Gemini 3.1 Flash Image (Nano Banana 2)", type: "image", params: [] },
|
||||
{ id: "gemini-3-pro-image-preview", name: "Gemini 3 Pro Image (Nano Banana Pro)", type: "image", params: [] },
|
||||
{ id: "gemini-2.5-flash-image", name: "Gemini 2.5 Flash Image (Nano Banana)", type: "image", params: [] },
|
||||
],
|
||||
openrouter: [
|
||||
// Embedding models
|
||||
@@ -254,10 +258,10 @@ export const PROVIDER_MODELS = {
|
||||
{ id: "openai/tts-1-hd", name: "TTS-1 HD", type: "tts" },
|
||||
{ id: "openai/tts-1", name: "TTS-1", type: "tts" },
|
||||
// Image models
|
||||
{ id: "openai/dall-e-3", name: "DALL-E 3 (via OpenRouter)", type: "image" },
|
||||
{ id: "openai/gpt-image-1", name: "GPT Image 1 (via OpenRouter)", type: "image" },
|
||||
{ id: "google/imagen-3.0-generate-002", name: "Imagen 3 (via OpenRouter)", type: "image" },
|
||||
{ id: "black-forest-labs/FLUX.1-schnell", name: "FLUX.1 Schnell (via OpenRouter)", type: "image" },
|
||||
{ id: "openai/dall-e-3", name: "DALL-E 3 (via OpenRouter)", type: "image", params: ["size", "quality", "style", "response_format"] },
|
||||
{ id: "openai/gpt-image-1", name: "GPT Image 1 (via OpenRouter)", type: "image", params: ["n", "size", "quality", "response_format"] },
|
||||
{ id: "google/imagen-3.0-generate-002", name: "Imagen 3 (via OpenRouter)", type: "image", params: ["n", "size"] },
|
||||
{ id: "black-forest-labs/FLUX.1-schnell", name: "FLUX.1 Schnell (via OpenRouter)", type: "image", params: ["n", "size"] },
|
||||
],
|
||||
glm: [
|
||||
{ id: "glm-5.1", name: "GLM 5.1" },
|
||||
@@ -282,7 +286,7 @@ export const PROVIDER_MODELS = {
|
||||
{ id: "MiniMax-M2.5", name: "MiniMax M2.5" },
|
||||
{ id: "MiniMax-M2.1", name: "MiniMax M2.1" },
|
||||
// Image models
|
||||
{ id: "minimax-image-01", name: "MiniMax Image 01", type: "image" },
|
||||
{ id: "minimax-image-01", name: "MiniMax Image 01", type: "image", params: ["n", "size", "response_format"] },
|
||||
],
|
||||
blackbox: [
|
||||
{ id: "gpt-4o", name: "GPT-4o" },
|
||||
@@ -468,20 +472,20 @@ export const PROVIDER_MODELS = {
|
||||
|
||||
// Image providers
|
||||
nanobanana: [
|
||||
{ id: "nanobanana-flash", name: "NanoBanana Flash", type: "image" },
|
||||
{ id: "nanobanana-pro", name: "NanoBanana Pro", type: "image" },
|
||||
{ id: "nanobanana-flash", name: "NanoBanana Flash", type: "image", params: ["n", "size"] },
|
||||
{ id: "nanobanana-pro", name: "NanoBanana Pro", type: "image", params: ["n", "size"] },
|
||||
],
|
||||
sdwebui: [
|
||||
{ id: "stable-diffusion-v1-5", name: "Stable Diffusion v1.5", type: "image" },
|
||||
{ id: "sdxl-base-1.0", name: "SDXL Base 1.0", type: "image" },
|
||||
{ id: "stable-diffusion-v1-5", name: "Stable Diffusion v1.5", type: "image", params: ["n", "size"] },
|
||||
{ id: "sdxl-base-1.0", name: "SDXL Base 1.0", type: "image", params: ["n", "size"] },
|
||||
],
|
||||
comfyui: [
|
||||
{ id: "flux-dev", name: "FLUX Dev", type: "image" },
|
||||
{ id: "sdxl", name: "SDXL", type: "image" },
|
||||
{ id: "flux-dev", name: "FLUX Dev", type: "image", params: ["n", "size"] },
|
||||
{ id: "sdxl", name: "SDXL", type: "image", params: ["n", "size"] },
|
||||
],
|
||||
huggingface: [
|
||||
{ id: "black-forest-labs/FLUX.1-schnell", name: "FLUX.1 Schnell", type: "image" },
|
||||
{ id: "stabilityai/stable-diffusion-xl-base-1.0", name: "SDXL Base 1.0", type: "image" },
|
||||
{ id: "black-forest-labs/FLUX.1-schnell", name: "FLUX.1 Schnell", type: "image", params: [] },
|
||||
{ id: "stabilityai/stable-diffusion-xl-base-1.0", name: "SDXL Base 1.0", type: "image", params: [] },
|
||||
],
|
||||
};
|
||||
|
||||
|
||||
@@ -23,7 +23,7 @@ function isGeminiProvider(provider) {
|
||||
* - Single input → embedContent body: { model, content: { parts: [{ text }] } }
|
||||
* - Batch input → batchEmbedContents body: { requests: [{ model, content: { parts: [{ text }] } }] }
|
||||
*/
|
||||
function buildEmbeddingsBody(provider, model, input, encodingFormat) {
|
||||
function buildEmbeddingsBody(provider, model, input, encodingFormat, dimensions) {
|
||||
if (isGeminiProvider(provider)) {
|
||||
// Normalize model name: Gemini API expects "models/<model>" prefix
|
||||
const geminiModel = model.startsWith("models/") ? model : `models/${model}`;
|
||||
@@ -50,6 +50,10 @@ function buildEmbeddingsBody(provider, model, input, encodingFormat) {
|
||||
if (encodingFormat) {
|
||||
body.encoding_format = encodingFormat;
|
||||
}
|
||||
if (dimensions != null && dimensions !== "") {
|
||||
const dim = Number(dimensions);
|
||||
if (Number.isFinite(dim) && dim > 0) body.dimensions = dim;
|
||||
}
|
||||
return body;
|
||||
}
|
||||
|
||||
@@ -79,10 +83,12 @@ function buildEmbeddingsUrl(provider, model, credentials, input) {
|
||||
case "openrouter":
|
||||
return "https://openrouter.ai/api/v1/embeddings";
|
||||
default:
|
||||
// openai-compatible providers: use their baseUrl + /embeddings
|
||||
if (provider?.startsWith?.("openai-compatible-")) {
|
||||
const baseUrl = credentials?.providerSpecificData?.baseUrl || "https://api.openai.com/v1";
|
||||
return `${baseUrl.replace(/\/$/, "")}/embeddings`;
|
||||
// openai-compatible & custom-embedding providers: use their baseUrl + /embeddings
|
||||
if (provider?.startsWith?.("openai-compatible-") || provider?.startsWith?.("custom-embedding-")) {
|
||||
const rawBaseUrl = credentials?.providerSpecificData?.baseUrl || "https://api.openai.com/v1";
|
||||
// Defensive: strip trailing slash and accidental /embeddings to avoid double-append
|
||||
const baseUrl = rawBaseUrl.replace(/\/$/, "").replace(/\/embeddings$/, "");
|
||||
return `${baseUrl}/embeddings`;
|
||||
}
|
||||
// For other providers, attempt to use their base URL pattern with /embeddings path
|
||||
return null;
|
||||
@@ -211,7 +217,7 @@ export async function handleEmbeddingsCore({
|
||||
}
|
||||
|
||||
const headers = buildEmbeddingsHeaders(provider, credentials);
|
||||
const requestBody = buildEmbeddingsBody(provider, model, input, encodingFormat);
|
||||
const requestBody = buildEmbeddingsBody(provider, model, input, encodingFormat, body.dimensions);
|
||||
|
||||
log?.debug?.("EMBEDDINGS", `${provider.toUpperCase()} | ${model} | input_type=${Array.isArray(input) ? `array[${input.length}]` : "string"}`);
|
||||
|
||||
|
||||
@@ -1,8 +1,16 @@
|
||||
import { randomUUID } from "node:crypto";
|
||||
import { createErrorResult, parseUpstreamError, formatProviderError } from "../utils/error.js";
|
||||
import { HTTP_STATUS } from "../config/runtimeConfig.js";
|
||||
import { refreshWithRetry } from "../services/tokenRefresh.js";
|
||||
import { getExecutor } from "../executors/index.js";
|
||||
|
||||
const CODEX_RESPONSES_URL = "https://chatgpt.com/backend-api/codex/responses";
|
||||
const CODEX_USER_AGENT = "codex-imagen/0.2.6";
|
||||
const CODEX_VERSION = "0.122.0";
|
||||
const CODEX_ORIGINATOR = "codex_cli_rs";
|
||||
const CODEX_MODEL_SUFFIX = "-image";
|
||||
const CODEX_REF_DETAIL = "high";
|
||||
|
||||
// Image provider configurations
|
||||
const IMAGE_PROVIDERS = {
|
||||
openai: {
|
||||
@@ -37,8 +45,161 @@ const IMAGE_PROVIDERS = {
|
||||
baseUrl: "https://api-inference.huggingface.co/models",
|
||||
format: "huggingface",
|
||||
},
|
||||
codex: {
|
||||
baseUrl: CODEX_RESPONSES_URL,
|
||||
format: "codex",
|
||||
stream: true,
|
||||
},
|
||||
};
|
||||
|
||||
// Decode codex chatgpt account id from idToken if not stored
|
||||
function decodeCodexAccountId(idToken) {
|
||||
try {
|
||||
const parts = String(idToken || "").split(".");
|
||||
if (parts.length !== 3) return null;
|
||||
const b64 = parts[1].replace(/-/g, "+").replace(/_/g, "/");
|
||||
const pad = (4 - (b64.length % 4)) % 4;
|
||||
const payload = JSON.parse(Buffer.from(b64 + "=".repeat(pad), "base64").toString("utf8"));
|
||||
return payload?.["https://api.openai.com/auth"]?.chatgpt_account_id || null;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
// Strip "-image" suffix to get the underlying chat model
|
||||
function stripCodexImageModel(model) {
|
||||
return model.endsWith(CODEX_MODEL_SUFFIX)
|
||||
? model.slice(0, -CODEX_MODEL_SUFFIX.length)
|
||||
: model;
|
||||
}
|
||||
|
||||
// Normalize a single ref image input to a data URL
|
||||
function toCodexDataUrl(input) {
|
||||
if (!input) return null;
|
||||
if (typeof input !== "string") return null;
|
||||
if (/^data:image\//i.test(input) || /^https?:\/\//i.test(input)) return input;
|
||||
// assume raw base64 PNG
|
||||
return `data:image/png;base64,${input}`;
|
||||
}
|
||||
|
||||
// Build content array with optional reference images, mirroring codex-imagen tagging
|
||||
function buildCodexContent(prompt, refs) {
|
||||
const content = [];
|
||||
refs.forEach((url, index) => {
|
||||
content.push({ type: "input_text", text: `<image name=image${index + 1}>` });
|
||||
content.push({ type: "input_image", image_url: url, detail: CODEX_REF_DETAIL });
|
||||
content.push({ type: "input_text", text: "</image>" });
|
||||
});
|
||||
content.push({ type: "input_text", text: prompt });
|
||||
return content;
|
||||
}
|
||||
|
||||
// Parse Codex SSE stream, log progress, return final base64 image.
|
||||
// Optional callbacks let caller forward events to client (SSE pipe).
|
||||
async function parseCodexImageStream(response, log, callbacks = {}) {
|
||||
const reader = response.body.getReader();
|
||||
const decoder = new TextDecoder();
|
||||
let buffer = "";
|
||||
let imageB64 = null;
|
||||
let lastEvent = null;
|
||||
let bytesReceived = 0;
|
||||
let lastProgressLogMs = 0;
|
||||
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) break;
|
||||
bytesReceived += value?.byteLength || 0;
|
||||
buffer += decoder.decode(value, { stream: true });
|
||||
|
||||
// SSE events separated by blank line
|
||||
let sepIdx;
|
||||
while ((sepIdx = buffer.indexOf("\n\n")) !== -1) {
|
||||
const block = buffer.slice(0, sepIdx);
|
||||
buffer = buffer.slice(sepIdx + 2);
|
||||
|
||||
const lines = block.split("\n");
|
||||
let eventName = null;
|
||||
let dataStr = "";
|
||||
for (const line of lines) {
|
||||
if (line.startsWith("event:")) eventName = line.slice(6).trim();
|
||||
else if (line.startsWith("data:")) dataStr += line.slice(5).trim();
|
||||
}
|
||||
if (!eventName) continue;
|
||||
if (eventName !== lastEvent) {
|
||||
log?.info?.("IMAGE", `codex progress: ${eventName}`);
|
||||
lastEvent = eventName;
|
||||
}
|
||||
|
||||
// Notify caller about progress (throttled to ~5/s to avoid flooding)
|
||||
const now = Date.now();
|
||||
if (callbacks.onProgress && now - lastProgressLogMs > 200) {
|
||||
lastProgressLogMs = now;
|
||||
callbacks.onProgress({ stage: eventName, bytesReceived });
|
||||
}
|
||||
|
||||
if (eventName === "response.image_generation_call.partial_image" && dataStr) {
|
||||
try {
|
||||
const data = JSON.parse(dataStr);
|
||||
if (callbacks.onPartialImage && data?.partial_image_b64) {
|
||||
callbacks.onPartialImage({ b64_json: data.partial_image_b64, index: data.partial_image_index });
|
||||
}
|
||||
} catch {}
|
||||
}
|
||||
|
||||
if (eventName === "response.output_item.done" && dataStr) {
|
||||
try {
|
||||
const data = JSON.parse(dataStr);
|
||||
const item = data?.item;
|
||||
if (item?.type === "image_generation_call" && item.result) {
|
||||
imageB64 = item.result;
|
||||
}
|
||||
} catch {}
|
||||
}
|
||||
}
|
||||
}
|
||||
return imageB64;
|
||||
}
|
||||
|
||||
// Build SSE Response that pipes codex progress + partial + done events to client
|
||||
function buildCodexSseResponse(providerResponse, log, onSuccess) {
|
||||
const stream = new ReadableStream({
|
||||
async start(controller) {
|
||||
const enc = new TextEncoder();
|
||||
const send = (event, data) => {
|
||||
controller.enqueue(enc.encode(`event: ${event}\ndata: ${JSON.stringify(data)}\n\n`));
|
||||
};
|
||||
try {
|
||||
const b64 = await parseCodexImageStream(providerResponse, log, {
|
||||
onProgress: (info) => send("progress", info),
|
||||
onPartialImage: (info) => send("partial_image", info),
|
||||
});
|
||||
if (!b64) {
|
||||
send("error", { message: "Codex did not return an image. Account may not be entitled (Plus/Pro required)." });
|
||||
} else {
|
||||
if (onSuccess) await onSuccess();
|
||||
send("done", {
|
||||
created: Math.floor(Date.now() / 1000),
|
||||
data: [{ b64_json: b64 }],
|
||||
});
|
||||
}
|
||||
} catch (err) {
|
||||
send("error", { message: err?.message || "Stream failed" });
|
||||
} finally {
|
||||
controller.close();
|
||||
}
|
||||
},
|
||||
});
|
||||
return new Response(stream, {
|
||||
headers: {
|
||||
"Content-Type": "text/event-stream",
|
||||
"Cache-Control": "no-cache, no-transform",
|
||||
"Connection": "keep-alive",
|
||||
"X-Accel-Buffering": "no",
|
||||
"Access-Control-Allow-Origin": "*",
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Build image generation URL
|
||||
*/
|
||||
@@ -54,6 +215,8 @@ function buildImageUrl(provider, model, credentials) {
|
||||
}
|
||||
case "huggingface":
|
||||
return `${config.baseUrl}/${model}`;
|
||||
case "codex":
|
||||
return CODEX_RESPONSES_URL;
|
||||
default:
|
||||
return config.baseUrl;
|
||||
}
|
||||
@@ -69,6 +232,23 @@ function buildImageHeaders(provider, credentials) {
|
||||
return headers;
|
||||
}
|
||||
|
||||
if (provider === "codex") {
|
||||
const accountId =
|
||||
credentials?.providerSpecificData?.chatgptAccountId ||
|
||||
decodeCodexAccountId(credentials?.idToken);
|
||||
return {
|
||||
"accept": "text/event-stream, application/json",
|
||||
"authorization": `Bearer ${credentials?.accessToken || ""}`,
|
||||
"chatgpt-account-id": accountId || "",
|
||||
"content-type": "application/json",
|
||||
"originator": CODEX_ORIGINATOR,
|
||||
"session_id": randomUUID(),
|
||||
"user-agent": CODEX_USER_AGENT,
|
||||
"version": CODEX_VERSION,
|
||||
"x-client-request-id": randomUUID(),
|
||||
};
|
||||
}
|
||||
|
||||
if (provider === "openrouter") {
|
||||
headers["Authorization"] = `Bearer ${credentials?.apiKey || credentials?.accessToken}`;
|
||||
headers["HTTP-Referer"] = "https://endpoint-proxy.local";
|
||||
@@ -92,9 +272,28 @@ function buildImageHeaders(provider, credentials) {
|
||||
* Build request body based on provider format
|
||||
*/
|
||||
function buildImageBody(provider, model, body) {
|
||||
const { prompt, n = 1, size = "1024x1024", quality, style, response_format } = body;
|
||||
const { prompt, n = 1, size = "1024x1024", quality, style, response_format, image, images } = body;
|
||||
|
||||
switch (provider) {
|
||||
case "codex": {
|
||||
const refs = [];
|
||||
if (Array.isArray(images)) images.forEach((i) => { const u = toCodexDataUrl(i); if (u) refs.push(u); });
|
||||
const single = toCodexDataUrl(image);
|
||||
if (single) refs.push(single);
|
||||
return {
|
||||
model: stripCodexImageModel(model),
|
||||
instructions: "",
|
||||
input: [{ type: "message", role: "user", content: buildCodexContent(prompt, refs) }],
|
||||
tools: [{ type: "image_generation", output_format: "png" }],
|
||||
tool_choice: "auto",
|
||||
parallel_tool_calls: false,
|
||||
prompt_cache_key: randomUUID(),
|
||||
stream: true,
|
||||
store: false,
|
||||
reasoning: null,
|
||||
};
|
||||
}
|
||||
|
||||
case "gemini":
|
||||
return {
|
||||
contents: [{ parts: [{ text: prompt }] }],
|
||||
@@ -204,6 +403,7 @@ export async function handleImageGenerationCore({
|
||||
modelInfo,
|
||||
credentials,
|
||||
log,
|
||||
streamToClient = false,
|
||||
onCredentialsRefreshed,
|
||||
onRequestSuccess,
|
||||
}) {
|
||||
@@ -285,7 +485,6 @@ export async function handleImageGenerationCore({
|
||||
|
||||
let responseBody;
|
||||
try {
|
||||
// HuggingFace returns binary image data
|
||||
if (provider === "huggingface") {
|
||||
const buffer = await providerResponse.arrayBuffer();
|
||||
const base64 = Buffer.from(buffer).toString("base64");
|
||||
@@ -293,6 +492,25 @@ export async function handleImageGenerationCore({
|
||||
created: Math.floor(Date.now() / 1000),
|
||||
data: [{ b64_json: base64 }],
|
||||
};
|
||||
} else if (provider === "codex") {
|
||||
// SSE pipe to client (progress + partial_image + done)
|
||||
if (streamToClient) {
|
||||
return {
|
||||
success: true,
|
||||
response: buildCodexSseResponse(providerResponse, log, onRequestSuccess),
|
||||
};
|
||||
}
|
||||
const b64 = await parseCodexImageStream(providerResponse, log);
|
||||
if (!b64) {
|
||||
return createErrorResult(
|
||||
HTTP_STATUS.BAD_GATEWAY,
|
||||
"Codex did not return an image. Account may not be entitled (Plus/Pro required)."
|
||||
);
|
||||
}
|
||||
responseBody = {
|
||||
created: Math.floor(Date.now() / 1000),
|
||||
data: [{ b64_json: b64 }],
|
||||
};
|
||||
} else {
|
||||
responseBody = await providerResponse.json();
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user