mirror of
https://github.com/decolua/9router.git
synced 2026-05-08 12:01:28 +00:00
fix(codex): await image URL fetches before sending to upstream (closes #575)
Remote HTTP(S) image URLs are fetched and inlined as base64 data URIs in a new prefetchImages() step run before super.execute(), so the body sent to Codex contains resolved image bytes instead of URLs the backend cannot access. Scope is limited to the Codex executor — base executor and other providers are untouched. Co-authored-by: anuragg-saxenaa <anuragg.saxenaa@gmail.com> Made-with: Cursor
This commit is contained in:
@@ -3,6 +3,7 @@ import { BaseExecutor } from "./base.js";
|
|||||||
import { CODEX_DEFAULT_INSTRUCTIONS } from "../config/codexInstructions.js";
|
import { CODEX_DEFAULT_INSTRUCTIONS } from "../config/codexInstructions.js";
|
||||||
import { PROVIDERS } from "../config/providers.js";
|
import { PROVIDERS } from "../config/providers.js";
|
||||||
import { normalizeResponsesInput } from "../translator/helpers/responsesApiHelper.js";
|
import { normalizeResponsesInput } from "../translator/helpers/responsesApiHelper.js";
|
||||||
|
import { fetchImageAsBase64 } from "../translator/helpers/imageHelper.js";
|
||||||
import { getConsistentMachineId } from "../../src/shared/utils/machineId.js";
|
import { getConsistentMachineId } from "../../src/shared/utils/machineId.js";
|
||||||
|
|
||||||
// In-memory map: hash(machineId + first assistant content) → { sessionId, lastUsed }
|
// In-memory map: hash(machineId + first assistant content) → { sessionId, lastUsed }
|
||||||
@@ -93,7 +94,36 @@ export class CodexExecutor extends BaseExecutor {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Transform request before sending - inject default instructions if missing
|
* Prefetch remote image URLs and inline them as base64 data URIs.
|
||||||
|
* Runs before execute() because Codex backend cannot fetch remote images.
|
||||||
|
* Mutates body.input in place.
|
||||||
|
*/
|
||||||
|
async prefetchImages(body) {
|
||||||
|
if (!Array.isArray(body?.input)) return;
|
||||||
|
for (const item of body.input) {
|
||||||
|
if (!Array.isArray(item.content)) continue;
|
||||||
|
const pending = item.content.map(async (c) => {
|
||||||
|
if (c.type !== "image_url") return c;
|
||||||
|
const url = typeof c.image_url === "string" ? c.image_url : c.image_url?.url;
|
||||||
|
const detail = c.image_url?.detail || "auto";
|
||||||
|
if (!url) return c;
|
||||||
|
if (url.startsWith("data:")) return { type: "input_image", image_url: url, detail };
|
||||||
|
const fetched = await fetchImageAsBase64(url, { timeoutMs: 15000 });
|
||||||
|
return { type: "input_image", image_url: fetched?.url || url, detail };
|
||||||
|
});
|
||||||
|
item.content = await Promise.all(pending);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async execute(args) {
|
||||||
|
// Fetch remote images before the synchronous transform/execute pipeline
|
||||||
|
await this.prefetchImages(args.body);
|
||||||
|
return super.execute(args);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Transform request before sending - inject default instructions if missing.
|
||||||
|
* Image fetching is handled separately in prefetchImages() so this stays sync.
|
||||||
*/
|
*/
|
||||||
transformRequest(model, body, stream, credentials) {
|
transformRequest(model, body, stream, credentials) {
|
||||||
this._isCompact = !!body._compact;
|
this._isCompact = !!body._compact;
|
||||||
@@ -109,21 +139,6 @@ export class CodexExecutor extends BaseExecutor {
|
|||||||
body.input = [{ type: "message", role: "user", content: [{ type: "input_text", text: "..." }] }];
|
body.input = [{ type: "message", role: "user", content: [{ type: "input_text", text: "..." }] }];
|
||||||
}
|
}
|
||||||
|
|
||||||
// Normalize image content: image_url → input_image (Responses API format)
|
|
||||||
if (Array.isArray(body.input)) {
|
|
||||||
for (const item of body.input) {
|
|
||||||
if (Array.isArray(item.content)) {
|
|
||||||
item.content = item.content.map(c => {
|
|
||||||
if (c.type === "image_url") {
|
|
||||||
const url = typeof c.image_url === "string" ? c.image_url : c.image_url?.url;
|
|
||||||
return { type: "input_image", image_url: url, detail: c.image_url?.detail || "auto" };
|
|
||||||
}
|
|
||||||
return c;
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Ensure streaming is enabled (Codex API requires it)
|
// Ensure streaming is enabled (Codex API requires it)
|
||||||
body.stream = true;
|
body.stream = true;
|
||||||
|
|
||||||
|
|||||||
34
open-sse/translator/helpers/imageHelper.js
Normal file
34
open-sse/translator/helpers/imageHelper.js
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
/**
|
||||||
|
* Fetch a remote image URL and return it as a base64 data URI.
|
||||||
|
* Used when upstream providers (Codex, etc.) require inline base64 images
|
||||||
|
* instead of remote URLs they cannot fetch.
|
||||||
|
* Returns null if fetch fails.
|
||||||
|
*
|
||||||
|
* @param {string} imageUrl - HTTP(S) URL of the image
|
||||||
|
* @param {object} options - { signal, timeoutMs }
|
||||||
|
* @returns {Promise<{url: string, mimeType: string}|null>}
|
||||||
|
*/
|
||||||
|
export async function fetchImageAsBase64(imageUrl, options = {}) {
|
||||||
|
const { signal, timeoutMs = 10000 } = options;
|
||||||
|
if (!imageUrl || (!imageUrl.startsWith("http://") && !imageUrl.startsWith("https://"))) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
const controller = new AbortController();
|
||||||
|
const timeout = signal ? null : setTimeout(() => controller.abort(), timeoutMs);
|
||||||
|
const fetchSignal = signal || controller.signal;
|
||||||
|
|
||||||
|
try {
|
||||||
|
const response = await fetch(imageUrl, { signal: fetchSignal });
|
||||||
|
if (!response.ok) return null;
|
||||||
|
|
||||||
|
const mimeType = response.headers.get("Content-Type") || "image/jpeg";
|
||||||
|
const arrayBuffer = await response.arrayBuffer();
|
||||||
|
const base64 = Buffer.from(arrayBuffer).toString("base64");
|
||||||
|
return { url: `data:${mimeType};base64,${base64}`, mimeType };
|
||||||
|
} catch {
|
||||||
|
return null;
|
||||||
|
} finally {
|
||||||
|
if (timeout) clearTimeout(timeout);
|
||||||
|
}
|
||||||
|
}
|
||||||
145
tests/unit/codex-image-fetch.test.js
Normal file
145
tests/unit/codex-image-fetch.test.js
Normal file
@@ -0,0 +1,145 @@
|
|||||||
|
/**
|
||||||
|
* Codex executor: verify remote image URLs are fetched and inlined as
|
||||||
|
* base64 data URIs BEFORE the request body reaches the upstream API.
|
||||||
|
*
|
||||||
|
* Covers bug #575:
|
||||||
|
* - prefetchImages must await async image fetches
|
||||||
|
* - execute() must run prefetchImages before super.execute so the body
|
||||||
|
* sent to upstream contains base64 data, not remote URLs
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { describe, it, expect, beforeEach, afterEach, vi } from "vitest";
|
||||||
|
import { CodexExecutor } from "../../open-sse/executors/codex.js";
|
||||||
|
import * as proxyFetchModule from "../../open-sse/utils/proxyFetch.js";
|
||||||
|
|
||||||
|
const IMAGE_1MB_BYTES = 1024 * 1024;
|
||||||
|
const REMOTE_URL = "https://example.com/big.jpg";
|
||||||
|
const DATA_URI = "data:image/png;base64,iVBORw0KGgo=";
|
||||||
|
|
||||||
|
function makeImageBuffer(sizeBytes) {
|
||||||
|
const buf = new Uint8Array(sizeBytes);
|
||||||
|
for (let i = 0; i < sizeBytes; i++) buf[i] = i & 0xff;
|
||||||
|
return buf.buffer;
|
||||||
|
}
|
||||||
|
|
||||||
|
function mockImageFetch(sizeBytes, mimeType = "image/jpeg") {
|
||||||
|
return {
|
||||||
|
ok: true,
|
||||||
|
headers: { get: (k) => (k === "Content-Type" ? mimeType : null) },
|
||||||
|
arrayBuffer: async () => makeImageBuffer(sizeBytes),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
describe("CodexExecutor image handling", () => {
|
||||||
|
let originalFetch;
|
||||||
|
|
||||||
|
beforeEach(() => {
|
||||||
|
originalFetch = global.fetch;
|
||||||
|
});
|
||||||
|
|
||||||
|
afterEach(() => {
|
||||||
|
global.fetch = originalFetch;
|
||||||
|
vi.restoreAllMocks();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("fetches 1MB remote image and inlines it as base64 data URI", async () => {
|
||||||
|
global.fetch = vi.fn(async () => mockImageFetch(IMAGE_1MB_BYTES));
|
||||||
|
|
||||||
|
const executor = new CodexExecutor();
|
||||||
|
const body = {
|
||||||
|
input: [
|
||||||
|
{
|
||||||
|
role: "user",
|
||||||
|
content: [
|
||||||
|
{ type: "input_text", text: "describe this" },
|
||||||
|
{ type: "image_url", image_url: { url: REMOTE_URL, detail: "high" } },
|
||||||
|
],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
};
|
||||||
|
|
||||||
|
await executor.prefetchImages(body);
|
||||||
|
|
||||||
|
const imgBlock = body.input[0].content.find((c) => c.type === "input_image");
|
||||||
|
expect(imgBlock, "input_image block must be present after prefetch").toBeDefined();
|
||||||
|
expect(imgBlock.image_url.startsWith("data:image/jpeg;base64,")).toBe(true);
|
||||||
|
expect(imgBlock.detail).toBe("high");
|
||||||
|
|
||||||
|
const base64Payload = imgBlock.image_url.split(",")[1];
|
||||||
|
const decodedLen = Buffer.from(base64Payload, "base64").length;
|
||||||
|
expect(decodedLen).toBe(IMAGE_1MB_BYTES);
|
||||||
|
expect(global.fetch).toHaveBeenCalledTimes(1);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("passes through existing data URIs without calling fetch", async () => {
|
||||||
|
global.fetch = vi.fn();
|
||||||
|
|
||||||
|
const executor = new CodexExecutor();
|
||||||
|
const body = {
|
||||||
|
input: [
|
||||||
|
{
|
||||||
|
role: "user",
|
||||||
|
content: [{ type: "image_url", image_url: { url: DATA_URI } }],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
};
|
||||||
|
|
||||||
|
await executor.prefetchImages(body);
|
||||||
|
|
||||||
|
const imgBlock = body.input[0].content.find((c) => c.type === "input_image");
|
||||||
|
expect(imgBlock.image_url).toBe(DATA_URI);
|
||||||
|
expect(global.fetch).not.toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("falls back to original URL when remote fetch fails", async () => {
|
||||||
|
global.fetch = vi.fn(async () => { throw new Error("network down"); });
|
||||||
|
|
||||||
|
const executor = new CodexExecutor();
|
||||||
|
const body = {
|
||||||
|
input: [
|
||||||
|
{
|
||||||
|
role: "user",
|
||||||
|
content: [{ type: "image_url", image_url: { url: REMOTE_URL } }],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
};
|
||||||
|
|
||||||
|
await executor.prefetchImages(body);
|
||||||
|
|
||||||
|
const imgBlock = body.input[0].content.find((c) => c.type === "input_image");
|
||||||
|
expect(imgBlock.image_url).toBe(REMOTE_URL);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("execute() prefetches images before sending to upstream", async () => {
|
||||||
|
global.fetch = vi.fn(async () => mockImageFetch(IMAGE_1MB_BYTES));
|
||||||
|
|
||||||
|
let capturedBodyString = null;
|
||||||
|
vi.spyOn(proxyFetchModule, "proxyAwareFetch").mockImplementation(async (url, init) => {
|
||||||
|
capturedBodyString = init.body;
|
||||||
|
return { ok: true, status: 200, headers: new Map() };
|
||||||
|
});
|
||||||
|
|
||||||
|
const executor = new CodexExecutor();
|
||||||
|
const body = {
|
||||||
|
input: [
|
||||||
|
{
|
||||||
|
role: "user",
|
||||||
|
content: [{ type: "image_url", image_url: { url: REMOTE_URL } }],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
};
|
||||||
|
|
||||||
|
await executor.execute({
|
||||||
|
model: "gpt-5.3-codex",
|
||||||
|
body,
|
||||||
|
stream: true,
|
||||||
|
credentials: { accessToken: "test" },
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(capturedBodyString).toBeTypeOf("string");
|
||||||
|
expect(capturedBodyString).not.toBe("{}");
|
||||||
|
const parsed = JSON.parse(capturedBodyString);
|
||||||
|
const imgBlock = parsed.input[0].content.find((c) => c.type === "input_image");
|
||||||
|
expect(imgBlock.image_url.startsWith("data:image/jpeg;base64,")).toBe(true);
|
||||||
|
});
|
||||||
|
});
|
||||||
Reference in New Issue
Block a user