diff --git a/open-sse/executors/codex.js b/open-sse/executors/codex.js index e7033623..daf9e712 100644 --- a/open-sse/executors/codex.js +++ b/open-sse/executors/codex.js @@ -3,6 +3,7 @@ import { BaseExecutor } from "./base.js"; import { CODEX_DEFAULT_INSTRUCTIONS } from "../config/codexInstructions.js"; import { PROVIDERS } from "../config/providers.js"; import { normalizeResponsesInput } from "../translator/helpers/responsesApiHelper.js"; +import { fetchImageAsBase64 } from "../translator/helpers/imageHelper.js"; import { getConsistentMachineId } from "../../src/shared/utils/machineId.js"; // In-memory map: hash(machineId + first assistant content) → { sessionId, lastUsed } @@ -93,7 +94,36 @@ export class CodexExecutor extends BaseExecutor { } /** - * Transform request before sending - inject default instructions if missing + * Prefetch remote image URLs and inline them as base64 data URIs. + * Runs before execute() because Codex backend cannot fetch remote images. + * Mutates body.input in place. + */ + async prefetchImages(body) { + if (!Array.isArray(body?.input)) return; + for (const item of body.input) { + if (!Array.isArray(item.content)) continue; + const pending = item.content.map(async (c) => { + if (c.type !== "image_url") return c; + const url = typeof c.image_url === "string" ? c.image_url : c.image_url?.url; + const detail = c.image_url?.detail || "auto"; + if (!url) return c; + if (url.startsWith("data:")) return { type: "input_image", image_url: url, detail }; + const fetched = await fetchImageAsBase64(url, { timeoutMs: 15000 }); + return { type: "input_image", image_url: fetched?.url || url, detail }; + }); + item.content = await Promise.all(pending); + } + } + + async execute(args) { + // Fetch remote images before the synchronous transform/execute pipeline + await this.prefetchImages(args.body); + return super.execute(args); + } + + /** + * Transform request before sending - inject default instructions if missing. + * Image fetching is handled separately in prefetchImages() so this stays sync. */ transformRequest(model, body, stream, credentials) { this._isCompact = !!body._compact; @@ -109,21 +139,6 @@ export class CodexExecutor extends BaseExecutor { body.input = [{ type: "message", role: "user", content: [{ type: "input_text", text: "..." }] }]; } - // Normalize image content: image_url → input_image (Responses API format) - if (Array.isArray(body.input)) { - for (const item of body.input) { - if (Array.isArray(item.content)) { - item.content = item.content.map(c => { - if (c.type === "image_url") { - const url = typeof c.image_url === "string" ? c.image_url : c.image_url?.url; - return { type: "input_image", image_url: url, detail: c.image_url?.detail || "auto" }; - } - return c; - }); - } - } - } - // Ensure streaming is enabled (Codex API requires it) body.stream = true; diff --git a/open-sse/translator/helpers/imageHelper.js b/open-sse/translator/helpers/imageHelper.js new file mode 100644 index 00000000..1df04d8d --- /dev/null +++ b/open-sse/translator/helpers/imageHelper.js @@ -0,0 +1,34 @@ +/** + * Fetch a remote image URL and return it as a base64 data URI. + * Used when upstream providers (Codex, etc.) require inline base64 images + * instead of remote URLs they cannot fetch. + * Returns null if fetch fails. + * + * @param {string} imageUrl - HTTP(S) URL of the image + * @param {object} options - { signal, timeoutMs } + * @returns {Promise<{url: string, mimeType: string}|null>} + */ +export async function fetchImageAsBase64(imageUrl, options = {}) { + const { signal, timeoutMs = 10000 } = options; + if (!imageUrl || (!imageUrl.startsWith("http://") && !imageUrl.startsWith("https://"))) { + return null; + } + + const controller = new AbortController(); + const timeout = signal ? null : setTimeout(() => controller.abort(), timeoutMs); + const fetchSignal = signal || controller.signal; + + try { + const response = await fetch(imageUrl, { signal: fetchSignal }); + if (!response.ok) return null; + + const mimeType = response.headers.get("Content-Type") || "image/jpeg"; + const arrayBuffer = await response.arrayBuffer(); + const base64 = Buffer.from(arrayBuffer).toString("base64"); + return { url: `data:${mimeType};base64,${base64}`, mimeType }; + } catch { + return null; + } finally { + if (timeout) clearTimeout(timeout); + } +} diff --git a/tests/unit/codex-image-fetch.test.js b/tests/unit/codex-image-fetch.test.js new file mode 100644 index 00000000..b2c6906a --- /dev/null +++ b/tests/unit/codex-image-fetch.test.js @@ -0,0 +1,145 @@ +/** + * Codex executor: verify remote image URLs are fetched and inlined as + * base64 data URIs BEFORE the request body reaches the upstream API. + * + * Covers bug #575: + * - prefetchImages must await async image fetches + * - execute() must run prefetchImages before super.execute so the body + * sent to upstream contains base64 data, not remote URLs + */ + +import { describe, it, expect, beforeEach, afterEach, vi } from "vitest"; +import { CodexExecutor } from "../../open-sse/executors/codex.js"; +import * as proxyFetchModule from "../../open-sse/utils/proxyFetch.js"; + +const IMAGE_1MB_BYTES = 1024 * 1024; +const REMOTE_URL = "https://example.com/big.jpg"; +const DATA_URI = "data:image/png;base64,iVBORw0KGgo="; + +function makeImageBuffer(sizeBytes) { + const buf = new Uint8Array(sizeBytes); + for (let i = 0; i < sizeBytes; i++) buf[i] = i & 0xff; + return buf.buffer; +} + +function mockImageFetch(sizeBytes, mimeType = "image/jpeg") { + return { + ok: true, + headers: { get: (k) => (k === "Content-Type" ? mimeType : null) }, + arrayBuffer: async () => makeImageBuffer(sizeBytes), + }; +} + +describe("CodexExecutor image handling", () => { + let originalFetch; + + beforeEach(() => { + originalFetch = global.fetch; + }); + + afterEach(() => { + global.fetch = originalFetch; + vi.restoreAllMocks(); + }); + + it("fetches 1MB remote image and inlines it as base64 data URI", async () => { + global.fetch = vi.fn(async () => mockImageFetch(IMAGE_1MB_BYTES)); + + const executor = new CodexExecutor(); + const body = { + input: [ + { + role: "user", + content: [ + { type: "input_text", text: "describe this" }, + { type: "image_url", image_url: { url: REMOTE_URL, detail: "high" } }, + ], + }, + ], + }; + + await executor.prefetchImages(body); + + const imgBlock = body.input[0].content.find((c) => c.type === "input_image"); + expect(imgBlock, "input_image block must be present after prefetch").toBeDefined(); + expect(imgBlock.image_url.startsWith("data:image/jpeg;base64,")).toBe(true); + expect(imgBlock.detail).toBe("high"); + + const base64Payload = imgBlock.image_url.split(",")[1]; + const decodedLen = Buffer.from(base64Payload, "base64").length; + expect(decodedLen).toBe(IMAGE_1MB_BYTES); + expect(global.fetch).toHaveBeenCalledTimes(1); + }); + + it("passes through existing data URIs without calling fetch", async () => { + global.fetch = vi.fn(); + + const executor = new CodexExecutor(); + const body = { + input: [ + { + role: "user", + content: [{ type: "image_url", image_url: { url: DATA_URI } }], + }, + ], + }; + + await executor.prefetchImages(body); + + const imgBlock = body.input[0].content.find((c) => c.type === "input_image"); + expect(imgBlock.image_url).toBe(DATA_URI); + expect(global.fetch).not.toHaveBeenCalled(); + }); + + it("falls back to original URL when remote fetch fails", async () => { + global.fetch = vi.fn(async () => { throw new Error("network down"); }); + + const executor = new CodexExecutor(); + const body = { + input: [ + { + role: "user", + content: [{ type: "image_url", image_url: { url: REMOTE_URL } }], + }, + ], + }; + + await executor.prefetchImages(body); + + const imgBlock = body.input[0].content.find((c) => c.type === "input_image"); + expect(imgBlock.image_url).toBe(REMOTE_URL); + }); + + it("execute() prefetches images before sending to upstream", async () => { + global.fetch = vi.fn(async () => mockImageFetch(IMAGE_1MB_BYTES)); + + let capturedBodyString = null; + vi.spyOn(proxyFetchModule, "proxyAwareFetch").mockImplementation(async (url, init) => { + capturedBodyString = init.body; + return { ok: true, status: 200, headers: new Map() }; + }); + + const executor = new CodexExecutor(); + const body = { + input: [ + { + role: "user", + content: [{ type: "image_url", image_url: { url: REMOTE_URL } }], + }, + ], + }; + + await executor.execute({ + model: "gpt-5.3-codex", + body, + stream: true, + credentials: { accessToken: "test" }, + }); + + expect(capturedBodyString).toBeTypeOf("string"); + expect(capturedBodyString).not.toBe("{}"); + const parsed = JSON.parse(capturedBodyString); + const imgBlock = parsed.input[0].content.find((c) => c.type === "input_image"); + expect(imgBlock.image_url.startsWith("data:image/jpeg;base64,")).toBe(true); + }); +});