mirror of
https://github.com/decolua/9router.git
synced 2026-05-08 12:01:28 +00:00
feat: add /v1/embeddings endpoint (OpenAI-compatible) (#146)
* feat: implement /v1/embeddings endpoint (#117) Add OpenAI-compatible POST /v1/embeddings endpoint that routes through the existing provider credential + fallback infrastructure. Changes: - open-sse/handlers/embeddingsCore.js: core handler (handleEmbeddingsCore) * Validates input (string or array), encoding_format * Builds provider-specific URL and headers for openai, openrouter, and openai-compatible providers * Handles 401/403 token refresh via executor.refreshCredentials * Returns normalized OpenAI-format response { object: 'list', data, model, usage } - cloud/src/handlers/embeddings.js: cloud Worker handler (handleEmbeddings) * Auth + machineId resolution identical to handleChat * Provider credential fallback loop with rate-limit tracking - cloud/src/index.js: wire new routes * POST /v1/embeddings (new format — machineId from API key) * POST /{machineId}/v1/embeddings (old format — machineId from URL) * test: add unit tests for /v1/embeddings endpoint - Setup vitest as test framework (tests/ directory) - embeddingsCore.test.js (36 tests): - buildEmbeddingsBody: single string, array, encoding_format, default float - buildEmbeddingsUrl: openai, openrouter, openai-compatible-*, unsupported - buildEmbeddingsHeaders: per-provider headers, accessToken fallback - handleEmbeddingsCore: input validation, success path, provider errors, network errors, invalid JSON, token refresh 401 handling - embeddings.cloud.test.js (23 tests): - CORS OPTIONS preflight - Auth: missing/invalid/old-format/wrong key → 401/400 - Body validation: bad JSON, missing model, missing input, bad model → 400 - Happy path: single string, array, delegation, CORS header, machineId override - Rate limiting: all-rate-limited → 429 + Retry-After, no credentials → 400 - Error propagation: non-fallback errors, 429 exhausts accounts Total: 59/59 tests passing Framework: vitest v4.0.18, Node v22.22.0 * feat: add Next.js API route for /v1/embeddings endpoint Wire the embeddings handler into Next.js App Router. - src/app/api/v1/embeddings/route.js: Next.js API route (POST + OPTIONS) - src/sse/handlers/embeddings.js: SSE-layer handler mirroring chat.js pattern Uses handleEmbeddingsCore from open-sse/handlers/embeddingsCore.js with the same auth, credential fallback, and token refresh logic as the chat handler. Supports REQUIRE_API_KEY env var, provider fallback loop, and consistent logging.
This commit is contained in:
285
cloud/src/handlers/embeddings.js
Normal file
285
cloud/src/handlers/embeddings.js
Normal file
@@ -0,0 +1,285 @@
|
||||
import { getModelInfoCore } from "open-sse/services/model.js";
|
||||
import { handleEmbeddingsCore } from "open-sse/handlers/embeddingsCore.js";
|
||||
import { errorResponse } from "open-sse/utils/error.js";
|
||||
import {
|
||||
checkFallbackError,
|
||||
isAccountUnavailable,
|
||||
getEarliestRateLimitedUntil,
|
||||
getUnavailableUntil,
|
||||
formatRetryAfter
|
||||
} from "open-sse/services/accountFallback.js";
|
||||
import { HTTP_STATUS } from "open-sse/config/constants.js";
|
||||
import * as log from "../utils/logger.js";
|
||||
import { parseApiKey, extractBearerToken } from "../utils/apiKey.js";
|
||||
import { getMachineData, saveMachineData } from "../services/storage.js";
|
||||
|
||||
/**
|
||||
* Handle POST /v1/embeddings and /{machineId}/v1/embeddings requests.
|
||||
*
|
||||
* Follows the same auth + fallback pattern as handleChat:
|
||||
* 1. Resolve machineId (from URL or API key)
|
||||
* 2. Validate API key
|
||||
* 3. Parse model → provider/model
|
||||
* 4. Get provider credentials with fallback loop
|
||||
* 5. Delegate to handleEmbeddingsCore (open-sse)
|
||||
*
|
||||
* @param {Request} request
|
||||
* @param {object} env - Cloudflare env bindings
|
||||
* @param {object} ctx - Execution context
|
||||
* @param {string|null} machineIdOverride - From URL path (old format), or null (new format)
|
||||
*/
|
||||
export async function handleEmbeddings(request, env, ctx, machineIdOverride = null) {
|
||||
if (request.method === "OPTIONS") {
|
||||
return new Response(null, {
|
||||
headers: {
|
||||
"Access-Control-Allow-Origin": "*",
|
||||
"Access-Control-Allow-Methods": "GET, POST, OPTIONS",
|
||||
"Access-Control-Allow-Headers": "*"
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// Resolve machineId
|
||||
let machineId = machineIdOverride;
|
||||
|
||||
if (!machineId) {
|
||||
const apiKey = extractBearerToken(request);
|
||||
if (!apiKey) return errorResponse(HTTP_STATUS.UNAUTHORIZED, "Missing API key");
|
||||
|
||||
const parsed = await parseApiKey(apiKey);
|
||||
if (!parsed) return errorResponse(HTTP_STATUS.UNAUTHORIZED, "Invalid API key format");
|
||||
|
||||
if (!parsed.isNewFormat || !parsed.machineId) {
|
||||
return errorResponse(
|
||||
HTTP_STATUS.BAD_REQUEST,
|
||||
"API key does not contain machineId. Use /{machineId}/v1/... endpoint for old format keys."
|
||||
);
|
||||
}
|
||||
machineId = parsed.machineId;
|
||||
}
|
||||
|
||||
// Validate API key
|
||||
if (!await validateApiKey(request, machineId, env)) {
|
||||
return errorResponse(HTTP_STATUS.UNAUTHORIZED, "Invalid API key");
|
||||
}
|
||||
|
||||
// Parse body
|
||||
let body;
|
||||
try {
|
||||
body = await request.json();
|
||||
} catch {
|
||||
return errorResponse(HTTP_STATUS.BAD_REQUEST, "Invalid JSON body");
|
||||
}
|
||||
|
||||
const modelStr = body.model;
|
||||
if (!modelStr) return errorResponse(HTTP_STATUS.BAD_REQUEST, "Missing model");
|
||||
|
||||
if (!body.input) return errorResponse(HTTP_STATUS.BAD_REQUEST, "Missing required field: input");
|
||||
|
||||
log.info("EMBEDDINGS", `${machineId} | ${modelStr}`);
|
||||
|
||||
// Resolve model info
|
||||
const data = await getMachineData(machineId, env);
|
||||
const modelInfo = await getModelInfoCore(modelStr, data?.modelAliases || {});
|
||||
if (!modelInfo.provider) return errorResponse(HTTP_STATUS.BAD_REQUEST, "Invalid model format");
|
||||
|
||||
const { provider, model } = modelInfo;
|
||||
log.info("EMBEDDINGS_MODEL", `${provider.toUpperCase()} | ${model}`);
|
||||
|
||||
// Provider credential + fallback loop (mirrors handleChat)
|
||||
let excludeConnectionId = null;
|
||||
let lastError = null;
|
||||
let lastStatus = null;
|
||||
|
||||
while (true) {
|
||||
const credentials = await getProviderCredentials(machineId, provider, env, excludeConnectionId);
|
||||
|
||||
if (!credentials || credentials.allRateLimited) {
|
||||
if (credentials?.allRateLimited) {
|
||||
const retryAfterSec = Math.ceil(
|
||||
(new Date(credentials.retryAfter).getTime() - Date.now()) / 1000
|
||||
);
|
||||
const errorMsg = lastError || credentials.lastError || "Unavailable";
|
||||
const msg = `[${provider}/${model}] ${errorMsg} (${credentials.retryAfterHuman})`;
|
||||
const status = lastStatus || Number(credentials.lastErrorCode) || HTTP_STATUS.SERVICE_UNAVAILABLE;
|
||||
log.warn("EMBEDDINGS", `${provider.toUpperCase()} | ${msg}`);
|
||||
return new Response(
|
||||
JSON.stringify({ error: { message: msg } }),
|
||||
{
|
||||
status,
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
"Retry-After": String(Math.max(retryAfterSec, 1))
|
||||
}
|
||||
}
|
||||
);
|
||||
}
|
||||
if (!excludeConnectionId) {
|
||||
return errorResponse(HTTP_STATUS.BAD_REQUEST, `No credentials for provider: ${provider}`);
|
||||
}
|
||||
log.warn("EMBEDDINGS", `${provider.toUpperCase()} | no more accounts`);
|
||||
return new Response(
|
||||
JSON.stringify({ error: lastError || "All accounts unavailable" }),
|
||||
{
|
||||
status: lastStatus || HTTP_STATUS.SERVICE_UNAVAILABLE,
|
||||
headers: { "Content-Type": "application/json" }
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
log.debug("EMBEDDINGS", `account=${credentials.id}`, { provider });
|
||||
|
||||
const result = await handleEmbeddingsCore({
|
||||
body,
|
||||
modelInfo: { provider, model },
|
||||
credentials,
|
||||
log,
|
||||
onCredentialsRefreshed: async (newCreds) => {
|
||||
await updateCredentials(machineId, credentials.id, newCreds, env);
|
||||
},
|
||||
onRequestSuccess: async () => {
|
||||
await clearAccountError(machineId, credentials.id, credentials, env);
|
||||
}
|
||||
});
|
||||
|
||||
if (result.success) return result.response;
|
||||
|
||||
const { shouldFallback } = checkFallbackError(result.status, result.error);
|
||||
|
||||
if (shouldFallback) {
|
||||
log.warn("EMBEDDINGS_FALLBACK", `${provider.toUpperCase()} | ${credentials.id} | ${result.status}`);
|
||||
await markAccountUnavailable(machineId, credentials.id, result.status, result.error, env);
|
||||
excludeConnectionId = credentials.id;
|
||||
lastError = result.error;
|
||||
lastStatus = result.status;
|
||||
continue;
|
||||
}
|
||||
|
||||
return result.response;
|
||||
}
|
||||
}
|
||||
|
||||
// ─── Helpers (same as chat.js) ───────────────────────────────────────────────
|
||||
|
||||
async function validateApiKey(request, machineId, env) {
|
||||
const authHeader = request.headers.get("Authorization");
|
||||
if (!authHeader?.startsWith("Bearer ")) return false;
|
||||
|
||||
const apiKey = authHeader.slice(7);
|
||||
const data = await getMachineData(machineId, env);
|
||||
return data?.apiKeys?.some(k => k.key === apiKey) || false;
|
||||
}
|
||||
|
||||
async function getProviderCredentials(machineId, provider, env, excludeConnectionId = null) {
|
||||
const data = await getMachineData(machineId, env);
|
||||
if (!data?.providers) return null;
|
||||
|
||||
const providerConnections = Object.entries(data.providers)
|
||||
.filter(([connId, conn]) => {
|
||||
if (conn.provider !== provider || !conn.isActive) return false;
|
||||
if (excludeConnectionId && connId === excludeConnectionId) return false;
|
||||
if (isAccountUnavailable(conn.rateLimitedUntil)) return false;
|
||||
return true;
|
||||
})
|
||||
.sort((a, b) => (a[1].priority || 999) - (b[1].priority || 999));
|
||||
|
||||
if (providerConnections.length === 0) {
|
||||
const allConnections = Object.entries(data.providers)
|
||||
.filter(([, conn]) => conn.provider === provider && conn.isActive)
|
||||
.map(([, conn]) => conn);
|
||||
const earliest = getEarliestRateLimitedUntil(allConnections);
|
||||
if (earliest) {
|
||||
const rateLimitedConns = allConnections.filter(
|
||||
c => c.rateLimitedUntil && new Date(c.rateLimitedUntil).getTime() > Date.now()
|
||||
);
|
||||
const earliestConn = rateLimitedConns.sort(
|
||||
(a, b) => new Date(a.rateLimitedUntil) - new Date(b.rateLimitedUntil)
|
||||
)[0];
|
||||
return {
|
||||
allRateLimited: true,
|
||||
retryAfter: earliest,
|
||||
retryAfterHuman: formatRetryAfter(earliest),
|
||||
lastError: earliestConn?.lastError || null,
|
||||
lastErrorCode: earliestConn?.errorCode || null
|
||||
};
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
const [connectionId, connection] = providerConnections[0];
|
||||
return {
|
||||
id: connectionId,
|
||||
apiKey: connection.apiKey,
|
||||
accessToken: connection.accessToken,
|
||||
refreshToken: connection.refreshToken,
|
||||
expiresAt: connection.expiresAt,
|
||||
projectId: connection.projectId,
|
||||
providerSpecificData: connection.providerSpecificData,
|
||||
status: connection.status,
|
||||
lastError: connection.lastError,
|
||||
rateLimitedUntil: connection.rateLimitedUntil
|
||||
};
|
||||
}
|
||||
|
||||
async function markAccountUnavailable(machineId, connectionId, status, errorText, env) {
|
||||
const data = await getMachineData(machineId, env);
|
||||
if (!data?.providers?.[connectionId]) return;
|
||||
|
||||
const conn = data.providers[connectionId];
|
||||
const backoffLevel = conn.backoffLevel || 0;
|
||||
const { cooldownMs, newBackoffLevel } = checkFallbackError(status, errorText, backoffLevel);
|
||||
const rateLimitedUntil = getUnavailableUntil(cooldownMs);
|
||||
const reason = typeof errorText === "string" ? errorText.slice(0, 100) : "Provider error";
|
||||
|
||||
data.providers[connectionId].rateLimitedUntil = rateLimitedUntil;
|
||||
data.providers[connectionId].status = "unavailable";
|
||||
data.providers[connectionId].lastError = reason;
|
||||
data.providers[connectionId].errorCode = status || null;
|
||||
data.providers[connectionId].lastErrorAt = new Date().toISOString();
|
||||
data.providers[connectionId].backoffLevel = newBackoffLevel ?? backoffLevel;
|
||||
data.providers[connectionId].updatedAt = new Date().toISOString();
|
||||
|
||||
await saveMachineData(machineId, data, env);
|
||||
log.warn("EMBEDDINGS_ACCOUNT", `${connectionId} | unavailable until ${rateLimitedUntil}`);
|
||||
}
|
||||
|
||||
async function clearAccountError(machineId, connectionId, currentCredentials, env) {
|
||||
const hasError =
|
||||
currentCredentials.status === "unavailable" ||
|
||||
currentCredentials.lastError ||
|
||||
currentCredentials.rateLimitedUntil;
|
||||
|
||||
if (!hasError) return;
|
||||
|
||||
const data = await getMachineData(machineId, env);
|
||||
if (!data?.providers?.[connectionId]) return;
|
||||
|
||||
data.providers[connectionId].status = "active";
|
||||
data.providers[connectionId].lastError = null;
|
||||
data.providers[connectionId].lastErrorAt = null;
|
||||
data.providers[connectionId].rateLimitedUntil = null;
|
||||
data.providers[connectionId].backoffLevel = 0;
|
||||
data.providers[connectionId].updatedAt = new Date().toISOString();
|
||||
|
||||
await saveMachineData(machineId, data, env);
|
||||
log.info("EMBEDDINGS_ACCOUNT", `${connectionId} | error cleared`);
|
||||
}
|
||||
|
||||
async function updateCredentials(machineId, connectionId, newCredentials, env) {
|
||||
const data = await getMachineData(machineId, env);
|
||||
if (!data?.providers?.[connectionId]) return;
|
||||
|
||||
data.providers[connectionId].accessToken = newCredentials.accessToken;
|
||||
if (newCredentials.refreshToken)
|
||||
data.providers[connectionId].refreshToken = newCredentials.refreshToken;
|
||||
if (newCredentials.expiresIn) {
|
||||
data.providers[connectionId].expiresAt = new Date(
|
||||
Date.now() + newCredentials.expiresIn * 1000
|
||||
).toISOString();
|
||||
data.providers[connectionId].expiresIn = newCredentials.expiresIn;
|
||||
}
|
||||
data.providers[connectionId].updatedAt = new Date().toISOString();
|
||||
|
||||
await saveMachineData(machineId, data, env);
|
||||
log.debug("EMBEDDINGS_TOKEN", `credentials updated | ${connectionId}`);
|
||||
}
|
||||
@@ -12,6 +12,7 @@ import { handleVerify } from "./handlers/verify.js";
|
||||
import { handleTestClaude } from "./handlers/testClaude.js";
|
||||
import { handleForward } from "./handlers/forward.js";
|
||||
import { handleForwardRaw } from "./handlers/forwardRaw.js";
|
||||
import { handleEmbeddings } from "./handlers/embeddings.js";
|
||||
import { createLandingPageResponse } from "./services/landingPage.js";
|
||||
|
||||
// Initialize translators at module load (static imports)
|
||||
@@ -115,6 +116,13 @@ const worker = {
|
||||
return addCorsHeaders(response);
|
||||
}
|
||||
|
||||
// New format: /v1/embeddings
|
||||
if (path === "/v1/embeddings" && request.method === "POST") {
|
||||
const response = await handleEmbeddings(request, env, ctx, null);
|
||||
log.response(response.status, Date.now() - startTime);
|
||||
return addCorsHeaders(response);
|
||||
}
|
||||
|
||||
// New format: /v1/responses (OpenAI Responses API - Codex CLI)
|
||||
if (path === "/v1/responses" && request.method === "POST") {
|
||||
const response = await handleChat(request, env, ctx, null);
|
||||
@@ -149,6 +157,14 @@ const worker = {
|
||||
return response;
|
||||
}
|
||||
|
||||
// Machine ID based embeddings endpoint
|
||||
if (path.match(/^\/[^\/]+\/v1\/embeddings$/) && request.method === "POST") {
|
||||
const machineId = path.split("/")[1];
|
||||
const response = await handleEmbeddings(request, env, ctx, machineId);
|
||||
log.response(response.status, Date.now() - startTime);
|
||||
return addCorsHeaders(response);
|
||||
}
|
||||
|
||||
// Machine ID based messages endpoint (Claude format)
|
||||
if (path.match(/^\/[^\/]+\/v1\/messages$/) && request.method === "POST") {
|
||||
const machineId = path.split("/")[1];
|
||||
|
||||
Reference in New Issue
Block a user