fix(codex): await image URL fetches before sending to upstream (closes #575)

Remote HTTP(S) image URLs are fetched and inlined as base64 data URIs
in a new prefetchImages() step run before super.execute(), so the body
sent to Codex contains resolved image bytes instead of URLs the backend
cannot access.

Scope is limited to the Codex executor — base executor and other
providers are untouched.

Co-authored-by: anuragg-saxenaa <anuragg.saxenaa@gmail.com>
Made-with: Cursor
This commit is contained in:
anuragg-saxenaa
2026-04-17 12:14:13 +07:00
committed by decolua
parent 6e8aaab299
commit d0ace2a3cf
3 changed files with 210 additions and 16 deletions

View File

@@ -3,6 +3,7 @@ import { BaseExecutor } from "./base.js";
import { CODEX_DEFAULT_INSTRUCTIONS } from "../config/codexInstructions.js";
import { PROVIDERS } from "../config/providers.js";
import { normalizeResponsesInput } from "../translator/helpers/responsesApiHelper.js";
import { fetchImageAsBase64 } from "../translator/helpers/imageHelper.js";
import { getConsistentMachineId } from "../../src/shared/utils/machineId.js";
// In-memory map: hash(machineId + first assistant content) → { sessionId, lastUsed }
@@ -93,7 +94,36 @@ export class CodexExecutor extends BaseExecutor {
}
/**
* Transform request before sending - inject default instructions if missing
* Prefetch remote image URLs and inline them as base64 data URIs.
* Runs before execute() because Codex backend cannot fetch remote images.
* Mutates body.input in place.
*/
async prefetchImages(body) {
if (!Array.isArray(body?.input)) return;
for (const item of body.input) {
if (!Array.isArray(item.content)) continue;
const pending = item.content.map(async (c) => {
if (c.type !== "image_url") return c;
const url = typeof c.image_url === "string" ? c.image_url : c.image_url?.url;
const detail = c.image_url?.detail || "auto";
if (!url) return c;
if (url.startsWith("data:")) return { type: "input_image", image_url: url, detail };
const fetched = await fetchImageAsBase64(url, { timeoutMs: 15000 });
return { type: "input_image", image_url: fetched?.url || url, detail };
});
item.content = await Promise.all(pending);
}
}
async execute(args) {
// Fetch remote images before the synchronous transform/execute pipeline
await this.prefetchImages(args.body);
return super.execute(args);
}
/**
* Transform request before sending - inject default instructions if missing.
* Image fetching is handled separately in prefetchImages() so this stays sync.
*/
transformRequest(model, body, stream, credentials) {
this._isCompact = !!body._compact;
@@ -109,21 +139,6 @@ export class CodexExecutor extends BaseExecutor {
body.input = [{ type: "message", role: "user", content: [{ type: "input_text", text: "..." }] }];
}
// Normalize image content: image_url → input_image (Responses API format)
if (Array.isArray(body.input)) {
for (const item of body.input) {
if (Array.isArray(item.content)) {
item.content = item.content.map(c => {
if (c.type === "image_url") {
const url = typeof c.image_url === "string" ? c.image_url : c.image_url?.url;
return { type: "input_image", image_url: url, detail: c.image_url?.detail || "auto" };
}
return c;
});
}
}
}
// Ensure streaming is enabled (Codex API requires it)
body.stream = true;

View File

@@ -0,0 +1,34 @@
/**
* Fetch a remote image URL and return it as a base64 data URI.
* Used when upstream providers (Codex, etc.) require inline base64 images
* instead of remote URLs they cannot fetch.
* Returns null if fetch fails.
*
* @param {string} imageUrl - HTTP(S) URL of the image
* @param {object} options - { signal, timeoutMs }
* @returns {Promise<{url: string, mimeType: string}|null>}
*/
export async function fetchImageAsBase64(imageUrl, options = {}) {
const { signal, timeoutMs = 10000 } = options;
if (!imageUrl || (!imageUrl.startsWith("http://") && !imageUrl.startsWith("https://"))) {
return null;
}
const controller = new AbortController();
const timeout = signal ? null : setTimeout(() => controller.abort(), timeoutMs);
const fetchSignal = signal || controller.signal;
try {
const response = await fetch(imageUrl, { signal: fetchSignal });
if (!response.ok) return null;
const mimeType = response.headers.get("Content-Type") || "image/jpeg";
const arrayBuffer = await response.arrayBuffer();
const base64 = Buffer.from(arrayBuffer).toString("base64");
return { url: `data:${mimeType};base64,${base64}`, mimeType };
} catch {
return null;
} finally {
if (timeout) clearTimeout(timeout);
}
}

View File

@@ -0,0 +1,145 @@
/**
* Codex executor: verify remote image URLs are fetched and inlined as
* base64 data URIs BEFORE the request body reaches the upstream API.
*
* Covers bug #575:
* - prefetchImages must await async image fetches
* - execute() must run prefetchImages before super.execute so the body
* sent to upstream contains base64 data, not remote URLs
*/
import { describe, it, expect, beforeEach, afterEach, vi } from "vitest";
import { CodexExecutor } from "../../open-sse/executors/codex.js";
import * as proxyFetchModule from "../../open-sse/utils/proxyFetch.js";
const IMAGE_1MB_BYTES = 1024 * 1024;
const REMOTE_URL = "https://example.com/big.jpg";
const DATA_URI = "data:image/png;base64,iVBORw0KGgo=";
function makeImageBuffer(sizeBytes) {
const buf = new Uint8Array(sizeBytes);
for (let i = 0; i < sizeBytes; i++) buf[i] = i & 0xff;
return buf.buffer;
}
function mockImageFetch(sizeBytes, mimeType = "image/jpeg") {
return {
ok: true,
headers: { get: (k) => (k === "Content-Type" ? mimeType : null) },
arrayBuffer: async () => makeImageBuffer(sizeBytes),
};
}
describe("CodexExecutor image handling", () => {
let originalFetch;
beforeEach(() => {
originalFetch = global.fetch;
});
afterEach(() => {
global.fetch = originalFetch;
vi.restoreAllMocks();
});
it("fetches 1MB remote image and inlines it as base64 data URI", async () => {
global.fetch = vi.fn(async () => mockImageFetch(IMAGE_1MB_BYTES));
const executor = new CodexExecutor();
const body = {
input: [
{
role: "user",
content: [
{ type: "input_text", text: "describe this" },
{ type: "image_url", image_url: { url: REMOTE_URL, detail: "high" } },
],
},
],
};
await executor.prefetchImages(body);
const imgBlock = body.input[0].content.find((c) => c.type === "input_image");
expect(imgBlock, "input_image block must be present after prefetch").toBeDefined();
expect(imgBlock.image_url.startsWith("data:image/jpeg;base64,")).toBe(true);
expect(imgBlock.detail).toBe("high");
const base64Payload = imgBlock.image_url.split(",")[1];
const decodedLen = Buffer.from(base64Payload, "base64").length;
expect(decodedLen).toBe(IMAGE_1MB_BYTES);
expect(global.fetch).toHaveBeenCalledTimes(1);
});
it("passes through existing data URIs without calling fetch", async () => {
global.fetch = vi.fn();
const executor = new CodexExecutor();
const body = {
input: [
{
role: "user",
content: [{ type: "image_url", image_url: { url: DATA_URI } }],
},
],
};
await executor.prefetchImages(body);
const imgBlock = body.input[0].content.find((c) => c.type === "input_image");
expect(imgBlock.image_url).toBe(DATA_URI);
expect(global.fetch).not.toHaveBeenCalled();
});
it("falls back to original URL when remote fetch fails", async () => {
global.fetch = vi.fn(async () => { throw new Error("network down"); });
const executor = new CodexExecutor();
const body = {
input: [
{
role: "user",
content: [{ type: "image_url", image_url: { url: REMOTE_URL } }],
},
],
};
await executor.prefetchImages(body);
const imgBlock = body.input[0].content.find((c) => c.type === "input_image");
expect(imgBlock.image_url).toBe(REMOTE_URL);
});
it("execute() prefetches images before sending to upstream", async () => {
global.fetch = vi.fn(async () => mockImageFetch(IMAGE_1MB_BYTES));
let capturedBodyString = null;
vi.spyOn(proxyFetchModule, "proxyAwareFetch").mockImplementation(async (url, init) => {
capturedBodyString = init.body;
return { ok: true, status: 200, headers: new Map() };
});
const executor = new CodexExecutor();
const body = {
input: [
{
role: "user",
content: [{ type: "image_url", image_url: { url: REMOTE_URL } }],
},
],
};
await executor.execute({
model: "gpt-5.3-codex",
body,
stream: true,
credentials: { accessToken: "test" },
});
expect(capturedBodyString).toBeTypeOf("string");
expect(capturedBodyString).not.toBe("{}");
const parsed = JSON.parse(capturedBodyString);
const imgBlock = parsed.input[0].content.find((c) => c.type === "input_image");
expect(imgBlock.image_url.startsWith("data:image/jpeg;base64,")).toBe(true);
});
});