Enhance image and embedding provider support

- Added new image models for GPT 5.2, 5.3, and 5.4, including capabilities for text-to-image and editing.
- Updated embedding handling to include optional dimensions in requests.
- Introduced support for custom embedding providers, allowing dynamic fetching and validation of custom nodes.
- Improved image generation handling with Codex integration, including progress tracking and error handling.
- Enhanced UI components to support adding custom embeddings and displaying their status.
This commit is contained in:
decolua
2026-04-25 16:22:30 +07:00
parent cca615eaff
commit 0b8bed5793
19 changed files with 1039 additions and 130 deletions

View File

@@ -36,6 +36,10 @@ export const PROVIDER_MODELS = {
{ id: "gpt-5.1", name: "GPT 5.1" },
{ id: "gpt-5-codex", name: "GPT 5 Codex" },
{ id: "gpt-5-codex-mini", name: "GPT 5 Codex Mini" },
// Image models (uses image_generation tool, requires Plus/Pro plan)
{ id: "gpt-5.4-image", name: "GPT 5.4 Image", type: "image", capabilities: ["text2img", "edit"] },
{ id: "gpt-5.3-image", name: "GPT 5.3 Image", type: "image", capabilities: ["text2img", "edit"] },
{ id: "gpt-5.2-image", name: "GPT 5.2 Image", type: "image", capabilities: ["text2img", "edit"] },
],
gc: [ // Gemini CLI
{ id: "gemini-3-flash-preview", name: "Gemini 3 Flash Preview" },
@@ -206,9 +210,9 @@ export const PROVIDER_MODELS = {
{ id: "tts-1-hd", name: "TTS-1 HD", type: "tts" },
{ id: "gpt-4o-mini-tts", name: "GPT-4o Mini TTS", type: "tts" },
// Image models
{ id: "gpt-image-1", name: "GPT Image 1", type: "image" },
{ id: "dall-e-3", name: "DALL-E 3", type: "image" },
{ id: "dall-e-2", name: "DALL-E 2", type: "image" },
{ id: "gpt-image-1", name: "GPT Image 1", type: "image", params: ["n", "size", "quality", "response_format"] },
{ id: "dall-e-3", name: "DALL-E 3", type: "image", params: ["size", "quality", "style", "response_format"] },
{ id: "dall-e-2", name: "DALL-E 2", type: "image", params: ["n", "size", "response_format"] },
],
anthropic: [
{ id: "claude-sonnet-4-20250514", name: "Claude Sonnet 4" },
@@ -236,9 +240,9 @@ export const PROVIDER_MODELS = {
{ id: "text-embedding-005", name: "Text Embedding 005", type: "embedding" },
{ id: "text-embedding-004", name: "Text Embedding 004 (Legacy)", type: "embedding" },
// Image models (Nano Banana)
{ id: "gemini-3.1-flash-image-preview", name: "Gemini 3.1 Flash Image (Nano Banana 2)", type: "image" },
{ id: "gemini-3-pro-image-preview", name: "Gemini 3 Pro Image (Nano Banana Pro)", type: "image" },
{ id: "gemini-2.5-flash-image", name: "Gemini 2.5 Flash Image (Nano Banana)", type: "image" },
{ id: "gemini-3.1-flash-image-preview", name: "Gemini 3.1 Flash Image (Nano Banana 2)", type: "image", params: [] },
{ id: "gemini-3-pro-image-preview", name: "Gemini 3 Pro Image (Nano Banana Pro)", type: "image", params: [] },
{ id: "gemini-2.5-flash-image", name: "Gemini 2.5 Flash Image (Nano Banana)", type: "image", params: [] },
],
openrouter: [
// Embedding models
@@ -254,10 +258,10 @@ export const PROVIDER_MODELS = {
{ id: "openai/tts-1-hd", name: "TTS-1 HD", type: "tts" },
{ id: "openai/tts-1", name: "TTS-1", type: "tts" },
// Image models
{ id: "openai/dall-e-3", name: "DALL-E 3 (via OpenRouter)", type: "image" },
{ id: "openai/gpt-image-1", name: "GPT Image 1 (via OpenRouter)", type: "image" },
{ id: "google/imagen-3.0-generate-002", name: "Imagen 3 (via OpenRouter)", type: "image" },
{ id: "black-forest-labs/FLUX.1-schnell", name: "FLUX.1 Schnell (via OpenRouter)", type: "image" },
{ id: "openai/dall-e-3", name: "DALL-E 3 (via OpenRouter)", type: "image", params: ["size", "quality", "style", "response_format"] },
{ id: "openai/gpt-image-1", name: "GPT Image 1 (via OpenRouter)", type: "image", params: ["n", "size", "quality", "response_format"] },
{ id: "google/imagen-3.0-generate-002", name: "Imagen 3 (via OpenRouter)", type: "image", params: ["n", "size"] },
{ id: "black-forest-labs/FLUX.1-schnell", name: "FLUX.1 Schnell (via OpenRouter)", type: "image", params: ["n", "size"] },
],
glm: [
{ id: "glm-5.1", name: "GLM 5.1" },
@@ -282,7 +286,7 @@ export const PROVIDER_MODELS = {
{ id: "MiniMax-M2.5", name: "MiniMax M2.5" },
{ id: "MiniMax-M2.1", name: "MiniMax M2.1" },
// Image models
{ id: "minimax-image-01", name: "MiniMax Image 01", type: "image" },
{ id: "minimax-image-01", name: "MiniMax Image 01", type: "image", params: ["n", "size", "response_format"] },
],
blackbox: [
{ id: "gpt-4o", name: "GPT-4o" },
@@ -468,20 +472,20 @@ export const PROVIDER_MODELS = {
// Image providers
nanobanana: [
{ id: "nanobanana-flash", name: "NanoBanana Flash", type: "image" },
{ id: "nanobanana-pro", name: "NanoBanana Pro", type: "image" },
{ id: "nanobanana-flash", name: "NanoBanana Flash", type: "image", params: ["n", "size"] },
{ id: "nanobanana-pro", name: "NanoBanana Pro", type: "image", params: ["n", "size"] },
],
sdwebui: [
{ id: "stable-diffusion-v1-5", name: "Stable Diffusion v1.5", type: "image" },
{ id: "sdxl-base-1.0", name: "SDXL Base 1.0", type: "image" },
{ id: "stable-diffusion-v1-5", name: "Stable Diffusion v1.5", type: "image", params: ["n", "size"] },
{ id: "sdxl-base-1.0", name: "SDXL Base 1.0", type: "image", params: ["n", "size"] },
],
comfyui: [
{ id: "flux-dev", name: "FLUX Dev", type: "image" },
{ id: "sdxl", name: "SDXL", type: "image" },
{ id: "flux-dev", name: "FLUX Dev", type: "image", params: ["n", "size"] },
{ id: "sdxl", name: "SDXL", type: "image", params: ["n", "size"] },
],
huggingface: [
{ id: "black-forest-labs/FLUX.1-schnell", name: "FLUX.1 Schnell", type: "image" },
{ id: "stabilityai/stable-diffusion-xl-base-1.0", name: "SDXL Base 1.0", type: "image" },
{ id: "black-forest-labs/FLUX.1-schnell", name: "FLUX.1 Schnell", type: "image", params: [] },
{ id: "stabilityai/stable-diffusion-xl-base-1.0", name: "SDXL Base 1.0", type: "image", params: [] },
],
};

View File

@@ -23,7 +23,7 @@ function isGeminiProvider(provider) {
* - Single input → embedContent body: { model, content: { parts: [{ text }] } }
* - Batch input → batchEmbedContents body: { requests: [{ model, content: { parts: [{ text }] } }] }
*/
function buildEmbeddingsBody(provider, model, input, encodingFormat) {
function buildEmbeddingsBody(provider, model, input, encodingFormat, dimensions) {
if (isGeminiProvider(provider)) {
// Normalize model name: Gemini API expects "models/<model>" prefix
const geminiModel = model.startsWith("models/") ? model : `models/${model}`;
@@ -50,6 +50,10 @@ function buildEmbeddingsBody(provider, model, input, encodingFormat) {
if (encodingFormat) {
body.encoding_format = encodingFormat;
}
if (dimensions != null && dimensions !== "") {
const dim = Number(dimensions);
if (Number.isFinite(dim) && dim > 0) body.dimensions = dim;
}
return body;
}
@@ -79,10 +83,12 @@ function buildEmbeddingsUrl(provider, model, credentials, input) {
case "openrouter":
return "https://openrouter.ai/api/v1/embeddings";
default:
// openai-compatible providers: use their baseUrl + /embeddings
if (provider?.startsWith?.("openai-compatible-")) {
const baseUrl = credentials?.providerSpecificData?.baseUrl || "https://api.openai.com/v1";
return `${baseUrl.replace(/\/$/, "")}/embeddings`;
// openai-compatible & custom-embedding providers: use their baseUrl + /embeddings
if (provider?.startsWith?.("openai-compatible-") || provider?.startsWith?.("custom-embedding-")) {
const rawBaseUrl = credentials?.providerSpecificData?.baseUrl || "https://api.openai.com/v1";
// Defensive: strip trailing slash and accidental /embeddings to avoid double-append
const baseUrl = rawBaseUrl.replace(/\/$/, "").replace(/\/embeddings$/, "");
return `${baseUrl}/embeddings`;
}
// For other providers, attempt to use their base URL pattern with /embeddings path
return null;
@@ -211,7 +217,7 @@ export async function handleEmbeddingsCore({
}
const headers = buildEmbeddingsHeaders(provider, credentials);
const requestBody = buildEmbeddingsBody(provider, model, input, encodingFormat);
const requestBody = buildEmbeddingsBody(provider, model, input, encodingFormat, body.dimensions);
log?.debug?.("EMBEDDINGS", `${provider.toUpperCase()} | ${model} | input_type=${Array.isArray(input) ? `array[${input.length}]` : "string"}`);

View File

@@ -1,8 +1,16 @@
import { randomUUID } from "node:crypto";
import { createErrorResult, parseUpstreamError, formatProviderError } from "../utils/error.js";
import { HTTP_STATUS } from "../config/runtimeConfig.js";
import { refreshWithRetry } from "../services/tokenRefresh.js";
import { getExecutor } from "../executors/index.js";
const CODEX_RESPONSES_URL = "https://chatgpt.com/backend-api/codex/responses";
const CODEX_USER_AGENT = "codex-imagen/0.2.6";
const CODEX_VERSION = "0.122.0";
const CODEX_ORIGINATOR = "codex_cli_rs";
const CODEX_MODEL_SUFFIX = "-image";
const CODEX_REF_DETAIL = "high";
// Image provider configurations
const IMAGE_PROVIDERS = {
openai: {
@@ -37,8 +45,161 @@ const IMAGE_PROVIDERS = {
baseUrl: "https://api-inference.huggingface.co/models",
format: "huggingface",
},
codex: {
baseUrl: CODEX_RESPONSES_URL,
format: "codex",
stream: true,
},
};
// Decode codex chatgpt account id from idToken if not stored
function decodeCodexAccountId(idToken) {
try {
const parts = String(idToken || "").split(".");
if (parts.length !== 3) return null;
const b64 = parts[1].replace(/-/g, "+").replace(/_/g, "/");
const pad = (4 - (b64.length % 4)) % 4;
const payload = JSON.parse(Buffer.from(b64 + "=".repeat(pad), "base64").toString("utf8"));
return payload?.["https://api.openai.com/auth"]?.chatgpt_account_id || null;
} catch {
return null;
}
}
// Strip "-image" suffix to get the underlying chat model
function stripCodexImageModel(model) {
return model.endsWith(CODEX_MODEL_SUFFIX)
? model.slice(0, -CODEX_MODEL_SUFFIX.length)
: model;
}
// Normalize a single ref image input to a data URL
function toCodexDataUrl(input) {
if (!input) return null;
if (typeof input !== "string") return null;
if (/^data:image\//i.test(input) || /^https?:\/\//i.test(input)) return input;
// assume raw base64 PNG
return `data:image/png;base64,${input}`;
}
// Build content array with optional reference images, mirroring codex-imagen tagging
function buildCodexContent(prompt, refs) {
const content = [];
refs.forEach((url, index) => {
content.push({ type: "input_text", text: `<image name=image${index + 1}>` });
content.push({ type: "input_image", image_url: url, detail: CODEX_REF_DETAIL });
content.push({ type: "input_text", text: "</image>" });
});
content.push({ type: "input_text", text: prompt });
return content;
}
// Parse Codex SSE stream, log progress, return final base64 image.
// Optional callbacks let caller forward events to client (SSE pipe).
async function parseCodexImageStream(response, log, callbacks = {}) {
const reader = response.body.getReader();
const decoder = new TextDecoder();
let buffer = "";
let imageB64 = null;
let lastEvent = null;
let bytesReceived = 0;
let lastProgressLogMs = 0;
while (true) {
const { done, value } = await reader.read();
if (done) break;
bytesReceived += value?.byteLength || 0;
buffer += decoder.decode(value, { stream: true });
// SSE events separated by blank line
let sepIdx;
while ((sepIdx = buffer.indexOf("\n\n")) !== -1) {
const block = buffer.slice(0, sepIdx);
buffer = buffer.slice(sepIdx + 2);
const lines = block.split("\n");
let eventName = null;
let dataStr = "";
for (const line of lines) {
if (line.startsWith("event:")) eventName = line.slice(6).trim();
else if (line.startsWith("data:")) dataStr += line.slice(5).trim();
}
if (!eventName) continue;
if (eventName !== lastEvent) {
log?.info?.("IMAGE", `codex progress: ${eventName}`);
lastEvent = eventName;
}
// Notify caller about progress (throttled to ~5/s to avoid flooding)
const now = Date.now();
if (callbacks.onProgress && now - lastProgressLogMs > 200) {
lastProgressLogMs = now;
callbacks.onProgress({ stage: eventName, bytesReceived });
}
if (eventName === "response.image_generation_call.partial_image" && dataStr) {
try {
const data = JSON.parse(dataStr);
if (callbacks.onPartialImage && data?.partial_image_b64) {
callbacks.onPartialImage({ b64_json: data.partial_image_b64, index: data.partial_image_index });
}
} catch {}
}
if (eventName === "response.output_item.done" && dataStr) {
try {
const data = JSON.parse(dataStr);
const item = data?.item;
if (item?.type === "image_generation_call" && item.result) {
imageB64 = item.result;
}
} catch {}
}
}
}
return imageB64;
}
// Build SSE Response that pipes codex progress + partial + done events to client
function buildCodexSseResponse(providerResponse, log, onSuccess) {
const stream = new ReadableStream({
async start(controller) {
const enc = new TextEncoder();
const send = (event, data) => {
controller.enqueue(enc.encode(`event: ${event}\ndata: ${JSON.stringify(data)}\n\n`));
};
try {
const b64 = await parseCodexImageStream(providerResponse, log, {
onProgress: (info) => send("progress", info),
onPartialImage: (info) => send("partial_image", info),
});
if (!b64) {
send("error", { message: "Codex did not return an image. Account may not be entitled (Plus/Pro required)." });
} else {
if (onSuccess) await onSuccess();
send("done", {
created: Math.floor(Date.now() / 1000),
data: [{ b64_json: b64 }],
});
}
} catch (err) {
send("error", { message: err?.message || "Stream failed" });
} finally {
controller.close();
}
},
});
return new Response(stream, {
headers: {
"Content-Type": "text/event-stream",
"Cache-Control": "no-cache, no-transform",
"Connection": "keep-alive",
"X-Accel-Buffering": "no",
"Access-Control-Allow-Origin": "*",
},
});
}
/**
* Build image generation URL
*/
@@ -54,6 +215,8 @@ function buildImageUrl(provider, model, credentials) {
}
case "huggingface":
return `${config.baseUrl}/${model}`;
case "codex":
return CODEX_RESPONSES_URL;
default:
return config.baseUrl;
}
@@ -69,6 +232,23 @@ function buildImageHeaders(provider, credentials) {
return headers;
}
if (provider === "codex") {
const accountId =
credentials?.providerSpecificData?.chatgptAccountId ||
decodeCodexAccountId(credentials?.idToken);
return {
"accept": "text/event-stream, application/json",
"authorization": `Bearer ${credentials?.accessToken || ""}`,
"chatgpt-account-id": accountId || "",
"content-type": "application/json",
"originator": CODEX_ORIGINATOR,
"session_id": randomUUID(),
"user-agent": CODEX_USER_AGENT,
"version": CODEX_VERSION,
"x-client-request-id": randomUUID(),
};
}
if (provider === "openrouter") {
headers["Authorization"] = `Bearer ${credentials?.apiKey || credentials?.accessToken}`;
headers["HTTP-Referer"] = "https://endpoint-proxy.local";
@@ -92,9 +272,28 @@ function buildImageHeaders(provider, credentials) {
* Build request body based on provider format
*/
function buildImageBody(provider, model, body) {
const { prompt, n = 1, size = "1024x1024", quality, style, response_format } = body;
const { prompt, n = 1, size = "1024x1024", quality, style, response_format, image, images } = body;
switch (provider) {
case "codex": {
const refs = [];
if (Array.isArray(images)) images.forEach((i) => { const u = toCodexDataUrl(i); if (u) refs.push(u); });
const single = toCodexDataUrl(image);
if (single) refs.push(single);
return {
model: stripCodexImageModel(model),
instructions: "",
input: [{ type: "message", role: "user", content: buildCodexContent(prompt, refs) }],
tools: [{ type: "image_generation", output_format: "png" }],
tool_choice: "auto",
parallel_tool_calls: false,
prompt_cache_key: randomUUID(),
stream: true,
store: false,
reasoning: null,
};
}
case "gemini":
return {
contents: [{ parts: [{ text: prompt }] }],
@@ -204,6 +403,7 @@ export async function handleImageGenerationCore({
modelInfo,
credentials,
log,
streamToClient = false,
onCredentialsRefreshed,
onRequestSuccess,
}) {
@@ -285,7 +485,6 @@ export async function handleImageGenerationCore({
let responseBody;
try {
// HuggingFace returns binary image data
if (provider === "huggingface") {
const buffer = await providerResponse.arrayBuffer();
const base64 = Buffer.from(buffer).toString("base64");
@@ -293,6 +492,25 @@ export async function handleImageGenerationCore({
created: Math.floor(Date.now() / 1000),
data: [{ b64_json: base64 }],
};
} else if (provider === "codex") {
// SSE pipe to client (progress + partial_image + done)
if (streamToClient) {
return {
success: true,
response: buildCodexSseResponse(providerResponse, log, onRequestSuccess),
};
}
const b64 = await parseCodexImageStream(providerResponse, log);
if (!b64) {
return createErrorResult(
HTTP_STATUS.BAD_GATEWAY,
"Codex did not return an image. Account may not be entitled (Plus/Pro required)."
);
}
responseBody = {
created: Math.floor(Date.now() / 1000),
data: [{ b64_json: b64 }],
};
} else {
responseBody = await providerResponse.json();
}