Enhance image and embedding provider support

- Added new image models for GPT 5.2, 5.3, and 5.4, including capabilities for text-to-image and editing. - Updated embedding handling to include optional dimensions in requests. - Introduced support for custom embedding providers, allowing dynamic fetching and validation of custom nodes. - Improved image generation handling with Codex integration, including progress tracking and error handling. - Enhanced UI components to support adding custom embeddings and displaying their status.
2026-05-08 12:01:28 +00:00 · 2026-04-25 16:22:30 +07:00
parent cca615eaff
commit 0b8bed5793
19 changed files with 1039 additions and 130 deletions
--- a/open-sse/config/providerModels.js
+++ b/open-sse/config/providerModels.js
@@ -36,6 +36,10 @@ export const PROVIDER_MODELS = {
    { id: "gpt-5.1", name: "GPT 5.1" },
    { id: "gpt-5-codex", name: "GPT 5 Codex" },
    { id: "gpt-5-codex-mini", name: "GPT 5 Codex Mini" },
+    // Image models (uses image_generation tool, requires Plus/Pro plan)
+    { id: "gpt-5.4-image", name: "GPT 5.4 Image", type: "image", capabilities: ["text2img", "edit"] },
+    { id: "gpt-5.3-image", name: "GPT 5.3 Image", type: "image", capabilities: ["text2img", "edit"] },
+    { id: "gpt-5.2-image", name: "GPT 5.2 Image", type: "image", capabilities: ["text2img", "edit"] },
  ],
  gc: [  // Gemini CLI
    { id: "gemini-3-flash-preview", name: "Gemini 3 Flash Preview" },
@@ -206,9 +210,9 @@ export const PROVIDER_MODELS = {
    { id: "tts-1-hd", name: "TTS-1 HD", type: "tts" },
    { id: "gpt-4o-mini-tts", name: "GPT-4o Mini TTS", type: "tts" },
    // Image models
-    { id: "gpt-image-1", name: "GPT Image 1", type: "image" },
-    { id: "dall-e-3", name: "DALL-E 3", type: "image" },
-    { id: "dall-e-2", name: "DALL-E 2", type: "image" },
+    { id: "gpt-image-1", name: "GPT Image 1", type: "image", params: ["n", "size", "quality", "response_format"] },
+    { id: "dall-e-3", name: "DALL-E 3", type: "image", params: ["size", "quality", "style", "response_format"] },
+    { id: "dall-e-2", name: "DALL-E 2", type: "image", params: ["n", "size", "response_format"] },
  ],
  anthropic: [
    { id: "claude-sonnet-4-20250514", name: "Claude Sonnet 4" },
@@ -236,9 +240,9 @@ export const PROVIDER_MODELS = {
    { id: "text-embedding-005", name: "Text Embedding 005", type: "embedding" },
    { id: "text-embedding-004", name: "Text Embedding 004 (Legacy)", type: "embedding" },
    // Image models (Nano Banana)
-    { id: "gemini-3.1-flash-image-preview", name: "Gemini 3.1 Flash Image (Nano Banana 2)", type: "image" },
-    { id: "gemini-3-pro-image-preview", name: "Gemini 3 Pro Image (Nano Banana Pro)", type: "image" },
-    { id: "gemini-2.5-flash-image", name: "Gemini 2.5 Flash Image (Nano Banana)", type: "image" },
+    { id: "gemini-3.1-flash-image-preview", name: "Gemini 3.1 Flash Image (Nano Banana 2)", type: "image", params: [] },
+    { id: "gemini-3-pro-image-preview", name: "Gemini 3 Pro Image (Nano Banana Pro)", type: "image", params: [] },
+    { id: "gemini-2.5-flash-image", name: "Gemini 2.5 Flash Image (Nano Banana)", type: "image", params: [] },
  ],
  openrouter: [
    // Embedding models
@@ -254,10 +258,10 @@ export const PROVIDER_MODELS = {
    { id: "openai/tts-1-hd",        name: "TTS-1 HD",        type: "tts" },
    { id: "openai/tts-1",           name: "TTS-1",           type: "tts" },
    // Image models
-    { id: "openai/dall-e-3", name: "DALL-E 3 (via OpenRouter)", type: "image" },
-    { id: "openai/gpt-image-1", name: "GPT Image 1 (via OpenRouter)", type: "image" },
-    { id: "google/imagen-3.0-generate-002", name: "Imagen 3 (via OpenRouter)", type: "image" },
-    { id: "black-forest-labs/FLUX.1-schnell", name: "FLUX.1 Schnell (via OpenRouter)", type: "image" },
+    { id: "openai/dall-e-3", name: "DALL-E 3 (via OpenRouter)", type: "image", params: ["size", "quality", "style", "response_format"] },
+    { id: "openai/gpt-image-1", name: "GPT Image 1 (via OpenRouter)", type: "image", params: ["n", "size", "quality", "response_format"] },
+    { id: "google/imagen-3.0-generate-002", name: "Imagen 3 (via OpenRouter)", type: "image", params: ["n", "size"] },
+    { id: "black-forest-labs/FLUX.1-schnell", name: "FLUX.1 Schnell (via OpenRouter)", type: "image", params: ["n", "size"] },
  ],
  glm: [
    { id: "glm-5.1", name: "GLM 5.1" },
@@ -282,7 +286,7 @@ export const PROVIDER_MODELS = {
    { id: "MiniMax-M2.5", name: "MiniMax M2.5" },
    { id: "MiniMax-M2.1", name: "MiniMax M2.1" },
    // Image models
-    { id: "minimax-image-01", name: "MiniMax Image 01", type: "image" },
+    { id: "minimax-image-01", name: "MiniMax Image 01", type: "image", params: ["n", "size", "response_format"] },
  ],
  blackbox: [
    { id: "gpt-4o", name: "GPT-4o" },
@@ -468,20 +472,20 @@ export const PROVIDER_MODELS = {

  // Image providers
  nanobanana: [
-    { id: "nanobanana-flash", name: "NanoBanana Flash", type: "image" },
-    { id: "nanobanana-pro", name: "NanoBanana Pro", type: "image" },
+    { id: "nanobanana-flash", name: "NanoBanana Flash", type: "image", params: ["n", "size"] },
+    { id: "nanobanana-pro", name: "NanoBanana Pro", type: "image", params: ["n", "size"] },
  ],
  sdwebui: [
-    { id: "stable-diffusion-v1-5", name: "Stable Diffusion v1.5", type: "image" },
-    { id: "sdxl-base-1.0", name: "SDXL Base 1.0", type: "image" },
+    { id: "stable-diffusion-v1-5", name: "Stable Diffusion v1.5", type: "image", params: ["n", "size"] },
+    { id: "sdxl-base-1.0", name: "SDXL Base 1.0", type: "image", params: ["n", "size"] },
  ],
  comfyui: [
-    { id: "flux-dev", name: "FLUX Dev", type: "image" },
-    { id: "sdxl", name: "SDXL", type: "image" },
+    { id: "flux-dev", name: "FLUX Dev", type: "image", params: ["n", "size"] },
+    { id: "sdxl", name: "SDXL", type: "image", params: ["n", "size"] },
  ],
  huggingface: [
-    { id: "black-forest-labs/FLUX.1-schnell", name: "FLUX.1 Schnell", type: "image" },
-    { id: "stabilityai/stable-diffusion-xl-base-1.0", name: "SDXL Base 1.0", type: "image" },
+    { id: "black-forest-labs/FLUX.1-schnell", name: "FLUX.1 Schnell", type: "image", params: [] },
+    { id: "stabilityai/stable-diffusion-xl-base-1.0", name: "SDXL Base 1.0", type: "image", params: [] },
  ],
 };

--- a/open-sse/handlers/embeddingsCore.js
+++ b/open-sse/handlers/embeddingsCore.js
@@ -23,7 +23,7 @@ function isGeminiProvider(provider) {
 *   - Single input  → embedContent  body: { model, content: { parts: [{ text }] } }
 *   - Batch input   → batchEmbedContents body: { requests: [{ model, content: { parts: [{ text }] } }] }
 */
-function buildEmbeddingsBody(provider, model, input, encodingFormat) {
+function buildEmbeddingsBody(provider, model, input, encodingFormat, dimensions) {
  if (isGeminiProvider(provider)) {
    // Normalize model name: Gemini API expects "models/<model>" prefix
    const geminiModel = model.startsWith("models/") ? model : `models/${model}`;
@@ -50,6 +50,10 @@ function buildEmbeddingsBody(provider, model, input, encodingFormat) {
  if (encodingFormat) {
    body.encoding_format = encodingFormat;
  }
+  if (dimensions != null && dimensions !== "") {
+    const dim = Number(dimensions);
+    if (Number.isFinite(dim) && dim > 0) body.dimensions = dim;
+  }
  return body;
 }

@@ -79,10 +83,12 @@ function buildEmbeddingsUrl(provider, model, credentials, input) {
    case "openrouter":
      return "https://openrouter.ai/api/v1/embeddings";
    default:
-      // openai-compatible providers: use their baseUrl + /embeddings
-      if (provider?.startsWith?.("openai-compatible-")) {
-        const baseUrl = credentials?.providerSpecificData?.baseUrl || "https://api.openai.com/v1";
-        return `${baseUrl.replace(/\/$/, "")}/embeddings`;
+      // openai-compatible & custom-embedding providers: use their baseUrl + /embeddings
+      if (provider?.startsWith?.("openai-compatible-") || provider?.startsWith?.("custom-embedding-")) {
+        const rawBaseUrl = credentials?.providerSpecificData?.baseUrl || "https://api.openai.com/v1";
+        // Defensive: strip trailing slash and accidental /embeddings to avoid double-append
+        const baseUrl = rawBaseUrl.replace(/\/$/, "").replace(/\/embeddings$/, "");
+        return `${baseUrl}/embeddings`;
      }
      // For other providers, attempt to use their base URL pattern with /embeddings path
      return null;
@@ -211,7 +217,7 @@ export async function handleEmbeddingsCore({
  }

  const headers = buildEmbeddingsHeaders(provider, credentials);
-  const requestBody = buildEmbeddingsBody(provider, model, input, encodingFormat);
+  const requestBody = buildEmbeddingsBody(provider, model, input, encodingFormat, body.dimensions);

  log?.debug?.("EMBEDDINGS", `${provider.toUpperCase()} | ${model} | input_type=${Array.isArray(input) ? `array[${input.length}]` : "string"}`);

--- a/open-sse/handlers/imageGenerationCore.js
+++ b/open-sse/handlers/imageGenerationCore.js
@@ -1,8 +1,16 @@
+import { randomUUID } from "node:crypto";
 import { createErrorResult, parseUpstreamError, formatProviderError } from "../utils/error.js";
 import { HTTP_STATUS } from "../config/runtimeConfig.js";
 import { refreshWithRetry } from "../services/tokenRefresh.js";
 import { getExecutor } from "../executors/index.js";

+const CODEX_RESPONSES_URL = "https://chatgpt.com/backend-api/codex/responses";
+const CODEX_USER_AGENT = "codex-imagen/0.2.6";
+const CODEX_VERSION = "0.122.0";
+const CODEX_ORIGINATOR = "codex_cli_rs";
+const CODEX_MODEL_SUFFIX = "-image";
+const CODEX_REF_DETAIL = "high";
+
 // Image provider configurations
 const IMAGE_PROVIDERS = {
  openai: {
@@ -37,8 +45,161 @@ const IMAGE_PROVIDERS = {
    baseUrl: "https://api-inference.huggingface.co/models",
    format: "huggingface",
  },
+  codex: {
+    baseUrl: CODEX_RESPONSES_URL,
+    format: "codex",
+    stream: true,
+  },
 };

+// Decode codex chatgpt account id from idToken if not stored
+function decodeCodexAccountId(idToken) {
+  try {
+    const parts = String(idToken || "").split(".");
+    if (parts.length !== 3) return null;
+    const b64 = parts[1].replace(/-/g, "+").replace(/_/g, "/");
+    const pad = (4 - (b64.length % 4)) % 4;
+    const payload = JSON.parse(Buffer.from(b64 + "=".repeat(pad), "base64").toString("utf8"));
+    return payload?.["https://api.openai.com/auth"]?.chatgpt_account_id || null;
+  } catch {
+    return null;
+  }
+}
+
+// Strip "-image" suffix to get the underlying chat model
+function stripCodexImageModel(model) {
+  return model.endsWith(CODEX_MODEL_SUFFIX)
+    ? model.slice(0, -CODEX_MODEL_SUFFIX.length)
+    : model;
+}
+
+// Normalize a single ref image input to a data URL
+function toCodexDataUrl(input) {
+  if (!input) return null;
+  if (typeof input !== "string") return null;
+  if (/^data:image\//i.test(input) || /^https?:\/\//i.test(input)) return input;
+  // assume raw base64 PNG
+  return `data:image/png;base64,${input}`;
+}
+
+// Build content array with optional reference images, mirroring codex-imagen tagging
+function buildCodexContent(prompt, refs) {
+  const content = [];
+  refs.forEach((url, index) => {
+    content.push({ type: "input_text", text: `<image name=image${index + 1}>` });
+    content.push({ type: "input_image", image_url: url, detail: CODEX_REF_DETAIL });
+    content.push({ type: "input_text", text: "</image>" });
+  });
+  content.push({ type: "input_text", text: prompt });
+  return content;
+}
+
+// Parse Codex SSE stream, log progress, return final base64 image.
+// Optional callbacks let caller forward events to client (SSE pipe).
+async function parseCodexImageStream(response, log, callbacks = {}) {
+  const reader = response.body.getReader();
+  const decoder = new TextDecoder();
+  let buffer = "";
+  let imageB64 = null;
+  let lastEvent = null;
+  let bytesReceived = 0;
+  let lastProgressLogMs = 0;
+
+  while (true) {
+    const { done, value } = await reader.read();
+    if (done) break;
+    bytesReceived += value?.byteLength || 0;
+    buffer += decoder.decode(value, { stream: true });
+
+    // SSE events separated by blank line
+    let sepIdx;
+    while ((sepIdx = buffer.indexOf("\n\n")) !== -1) {
+      const block = buffer.slice(0, sepIdx);
+      buffer = buffer.slice(sepIdx + 2);
+
+      const lines = block.split("\n");
+      let eventName = null;
+      let dataStr = "";
+      for (const line of lines) {
+        if (line.startsWith("event:")) eventName = line.slice(6).trim();
+        else if (line.startsWith("data:")) dataStr += line.slice(5).trim();
+      }
+      if (!eventName) continue;
+      if (eventName !== lastEvent) {
+        log?.info?.("IMAGE", `codex progress: ${eventName}`);
+        lastEvent = eventName;
+      }
+
+      // Notify caller about progress (throttled to ~5/s to avoid flooding)
+      const now = Date.now();
+      if (callbacks.onProgress && now - lastProgressLogMs > 200) {
+        lastProgressLogMs = now;
+        callbacks.onProgress({ stage: eventName, bytesReceived });
+      }
+
+      if (eventName === "response.image_generation_call.partial_image" && dataStr) {
+        try {
+          const data = JSON.parse(dataStr);
+          if (callbacks.onPartialImage && data?.partial_image_b64) {
+            callbacks.onPartialImage({ b64_json: data.partial_image_b64, index: data.partial_image_index });
+          }
+        } catch {}
+      }
+
+      if (eventName === "response.output_item.done" && dataStr) {
+        try {
+          const data = JSON.parse(dataStr);
+          const item = data?.item;
+          if (item?.type === "image_generation_call" && item.result) {
+            imageB64 = item.result;
+          }
+        } catch {}
+      }
+    }
+  }
+  return imageB64;
+}
+
+// Build SSE Response that pipes codex progress + partial + done events to client
+function buildCodexSseResponse(providerResponse, log, onSuccess) {
+  const stream = new ReadableStream({
+    async start(controller) {
+      const enc = new TextEncoder();
+      const send = (event, data) => {
+        controller.enqueue(enc.encode(`event: ${event}\ndata: ${JSON.stringify(data)}\n\n`));
+      };
+      try {
+        const b64 = await parseCodexImageStream(providerResponse, log, {
+          onProgress: (info) => send("progress", info),
+          onPartialImage: (info) => send("partial_image", info),
+        });
+        if (!b64) {
+          send("error", { message: "Codex did not return an image. Account may not be entitled (Plus/Pro required)." });
+        } else {
+          if (onSuccess) await onSuccess();
+          send("done", {
+            created: Math.floor(Date.now() / 1000),
+            data: [{ b64_json: b64 }],
+          });
+        }
+      } catch (err) {
+        send("error", { message: err?.message || "Stream failed" });
+      } finally {
+        controller.close();
+      }
+    },
+  });
+  return new Response(stream, {
+    headers: {
+      "Content-Type": "text/event-stream",
+      "Cache-Control": "no-cache, no-transform",
+      "Connection": "keep-alive",
+      "X-Accel-Buffering": "no",
+      "Access-Control-Allow-Origin": "*",
+    },
+  });
+}
+
 /**
 * Build image generation URL
 */
@@ -54,6 +215,8 @@ function buildImageUrl(provider, model, credentials) {
    }
    case "huggingface":
      return `${config.baseUrl}/${model}`;
+    case "codex":
+      return CODEX_RESPONSES_URL;
    default:
      return config.baseUrl;
  }
@@ -69,6 +232,23 @@ function buildImageHeaders(provider, credentials) {
    return headers;
  }

+  if (provider === "codex") {
+    const accountId =
+      credentials?.providerSpecificData?.chatgptAccountId ||
+      decodeCodexAccountId(credentials?.idToken);
+    return {
+      "accept": "text/event-stream, application/json",
+      "authorization": `Bearer ${credentials?.accessToken || ""}`,
+      "chatgpt-account-id": accountId || "",
+      "content-type": "application/json",
+      "originator": CODEX_ORIGINATOR,
+      "session_id": randomUUID(),
+      "user-agent": CODEX_USER_AGENT,
+      "version": CODEX_VERSION,
+      "x-client-request-id": randomUUID(),
+    };
+  }
+
  if (provider === "openrouter") {
    headers["Authorization"] = `Bearer ${credentials?.apiKey || credentials?.accessToken}`;
    headers["HTTP-Referer"] = "https://endpoint-proxy.local";
@@ -92,9 +272,28 @@ function buildImageHeaders(provider, credentials) {
 * Build request body based on provider format
 */
 function buildImageBody(provider, model, body) {
-  const { prompt, n = 1, size = "1024x1024", quality, style, response_format } = body;
+  const { prompt, n = 1, size = "1024x1024", quality, style, response_format, image, images } = body;

  switch (provider) {
+    case "codex": {
+      const refs = [];
+      if (Array.isArray(images)) images.forEach((i) => { const u = toCodexDataUrl(i); if (u) refs.push(u); });
+      const single = toCodexDataUrl(image);
+      if (single) refs.push(single);
+      return {
+        model: stripCodexImageModel(model),
+        instructions: "",
+        input: [{ type: "message", role: "user", content: buildCodexContent(prompt, refs) }],
+        tools: [{ type: "image_generation", output_format: "png" }],
+        tool_choice: "auto",
+        parallel_tool_calls: false,
+        prompt_cache_key: randomUUID(),
+        stream: true,
+        store: false,
+        reasoning: null,
+      };
+    }
+
    case "gemini":
      return {
        contents: [{ parts: [{ text: prompt }] }],
@@ -204,6 +403,7 @@ export async function handleImageGenerationCore({
  modelInfo,
  credentials,
  log,
+  streamToClient = false,
  onCredentialsRefreshed,
  onRequestSuccess,
 }) {
@@ -285,7 +485,6 @@ export async function handleImageGenerationCore({

  let responseBody;
  try {
-    // HuggingFace returns binary image data
    if (provider === "huggingface") {
      const buffer = await providerResponse.arrayBuffer();
      const base64 = Buffer.from(buffer).toString("base64");
@@ -293,6 +492,25 @@ export async function handleImageGenerationCore({
        created: Math.floor(Date.now() / 1000),
        data: [{ b64_json: base64 }],
      };
+    } else if (provider === "codex") {
+      // SSE pipe to client (progress + partial_image + done)
+      if (streamToClient) {
+        return {
+          success: true,
+          response: buildCodexSseResponse(providerResponse, log, onRequestSuccess),
+        };
+      }
+      const b64 = await parseCodexImageStream(providerResponse, log);
+      if (!b64) {
+        return createErrorResult(
+          HTTP_STATUS.BAD_GATEWAY,
+          "Codex did not return an image. Account may not be entitled (Plus/Pro required)."
+        );
+      }
+      responseBody = {
+        created: Math.floor(Date.now() / 1000),
+        data: [{ b64_json: b64 }],
+      };
    } else {
      responseBody = await providerResponse.json();
    }