diff --git a/.gitignore b/.gitignore index 06d10b2d..b1a63e62 100644 --- a/.gitignore +++ b/.gitignore @@ -69,3 +69,4 @@ deploy*.sh ecosystem.config.* scripts/agSniffer/* +gitbooks/* diff --git a/open-sse/config/providerModels.js b/open-sse/config/providerModels.js index cf6f2c58..1e991183 100644 --- a/open-sse/config/providerModels.js +++ b/open-sse/config/providerModels.js @@ -556,6 +556,40 @@ export const PROVIDER_MODELS = { { id: "black-forest-labs/FLUX.1-schnell", name: "FLUX.1 Schnell", type: "image", params: [] }, { id: "stabilityai/stable-diffusion-xl-base-1.0", name: "SDXL Base 1.0", type: "image", params: [] }, ], + "fal-ai": [ + { id: "fal-ai/flux/schnell", name: "FLUX Schnell", type: "image", params: ["n", "size"] }, + { id: "fal-ai/flux/dev", name: "FLUX Dev", type: "image", params: ["n", "size"] }, + { id: "fal-ai/flux-pro/v1.1", name: "FLUX Pro v1.1", type: "image", params: ["n", "size"] }, + { id: "fal-ai/flux-pro/v1.1-ultra", name: "FLUX Pro v1.1 Ultra", type: "image", params: ["n", "size"] }, + { id: "fal-ai/recraft-v3", name: "Recraft V3", type: "image", params: ["n", "size", "style"] }, + { id: "fal-ai/ideogram/v2", name: "Ideogram V2", type: "image", params: ["n", "size", "style"] }, + { id: "fal-ai/stable-diffusion-v35-large", name: "SD 3.5 Large", type: "image", params: ["n", "size"] }, + ], + "stability-ai": [ + { id: "stable-image-ultra", name: "Stable Image Ultra", type: "image", params: ["size"] }, + { id: "stable-image-core", name: "Stable Image Core", type: "image", params: ["size", "style"] }, + { id: "sd3.5-large", name: "Stable Diffusion 3.5 Large", type: "image", params: ["size"] }, + { id: "sd3.5-large-turbo", name: "Stable Diffusion 3.5 Large Turbo", type: "image", params: ["size"] }, + { id: "sd3.5-medium", name: "Stable Diffusion 3.5 Medium", type: "image", params: ["size"] }, + ], + "black-forest-labs": [ + { id: "flux-pro-1.1", name: "FLUX Pro 1.1", type: "image", params: ["n", "size"] }, + { id: "flux-pro-1.1-ultra", name: "FLUX Pro 1.1 Ultra", type: "image", params: ["size"] }, + { id: "flux-pro", name: "FLUX Pro", type: "image", params: ["n", "size"] }, + { id: "flux-dev", name: "FLUX Dev", type: "image", params: ["n", "size"] }, + { id: "flux-kontext-pro", name: "FLUX Kontext Pro (Edit)", type: "image", params: ["size"], capabilities: ["edit"] }, + { id: "flux-kontext-max", name: "FLUX Kontext Max (Edit)", type: "image", params: ["size"], capabilities: ["edit"] }, + ], + recraft: [ + { id: "recraftv3", name: "Recraft V3", type: "image", params: ["n", "size", "style"] }, + { id: "recraftv2", name: "Recraft V2", type: "image", params: ["n", "size", "style"] }, + ], + runwayml: [ + { id: "gen4_image", name: "Gen-4 Image", type: "image", params: ["size"] }, + { id: "gen4_image_turbo", name: "Gen-4 Image Turbo", type: "image", params: ["size"] }, + { id: "gen4_turbo", name: "Gen-4 Turbo", type: "video", params: [] }, + { id: "gen3a_turbo", name: "Gen-3 Alpha Turbo", type: "video", params: [] }, + ], }; // Helper functions diff --git a/open-sse/handlers/embeddingProviders/_base.js b/open-sse/handlers/embeddingProviders/_base.js new file mode 100644 index 00000000..2a8f3998 --- /dev/null +++ b/open-sse/handlers/embeddingProviders/_base.js @@ -0,0 +1,4 @@ +// Shared embedding helpers +export function bearerAuth(creds) { + return { "Authorization": `Bearer ${creds.apiKey || creds.accessToken}` }; +} diff --git a/open-sse/handlers/embeddingProviders/gemini.js b/open-sse/handlers/embeddingProviders/gemini.js new file mode 100644 index 00000000..1a61954c --- /dev/null +++ b/open-sse/handlers/embeddingProviders/gemini.js @@ -0,0 +1,42 @@ +// Google Gemini embeddings — embedContent / batchEmbedContents +const BASE = "https://generativelanguage.googleapis.com/v1beta"; + +function modelPath(model) { + return model.startsWith("models/") ? model : `models/${model}`; +} + +export default { + buildUrl: (model, creds, { input } = {}) => { + const apiKey = creds.apiKey || creds.accessToken; + const path = modelPath(model); + const op = Array.isArray(input) ? "batchEmbedContents" : "embedContent"; + return `${BASE}/${path}:${op}?key=${encodeURIComponent(apiKey)}`; + }, + buildHeaders: () => ({ "Content-Type": "application/json" }), + buildBody: (model, { input }) => { + const m = modelPath(model); + if (Array.isArray(input)) { + return { requests: input.map((text) => ({ model: m, content: { parts: [{ text: String(text) }] } })) }; + } + return { model: m, content: { parts: [{ text: String(input) }] } }; + }, + normalize: (responseBody, model) => { + if (responseBody.object === "list" && Array.isArray(responseBody.data)) return responseBody; + let items = []; + if (Array.isArray(responseBody.embeddings)) { + items = responseBody.embeddings.map((emb, idx) => ({ + object: "embedding", + index: idx, + embedding: emb.values || [], + })); + } else if (responseBody.embedding?.values) { + items = [{ object: "embedding", index: 0, embedding: responseBody.embedding.values }]; + } + return { + object: "list", + data: items, + model, + usage: { prompt_tokens: 0, total_tokens: 0 }, + }; + }, +}; diff --git a/open-sse/handlers/embeddingProviders/index.js b/open-sse/handlers/embeddingProviders/index.js new file mode 100644 index 00000000..bbc7c6f5 --- /dev/null +++ b/open-sse/handlers/embeddingProviders/index.js @@ -0,0 +1,23 @@ +// Embeddings provider adapter registry +import createOpenAIEmbeddingAdapter from "./openai.js"; +import gemini from "./gemini.js"; +import openaiCompatNode from "./openaiCompatNode.js"; + +const OPENAI_COMPAT_PROVIDERS = [ + "openai", "openrouter", "mistral", "voyage-ai", "fireworks", + "together", "nebius", "github", "nvidia", "jina-ai", +]; + +const ADAPTERS = { + ...Object.fromEntries(OPENAI_COMPAT_PROVIDERS.map((id) => [id, createOpenAIEmbeddingAdapter(id)])), + gemini, + google_ai_studio: gemini, +}; + +export function getEmbeddingAdapter(provider) { + if (ADAPTERS[provider]) return ADAPTERS[provider]; + if (provider?.startsWith?.("openai-compatible-") || provider?.startsWith?.("custom-embedding-")) { + return openaiCompatNode; + } + return null; +} diff --git a/open-sse/handlers/embeddingProviders/openai.js b/open-sse/handlers/embeddingProviders/openai.js new file mode 100644 index 00000000..c89cd9d5 --- /dev/null +++ b/open-sse/handlers/embeddingProviders/openai.js @@ -0,0 +1,39 @@ +// OpenAI-compatible embeddings adapter (most providers) +import { bearerAuth } from "./_base.js"; + +const ENDPOINTS = { + openai: "https://api.openai.com/v1/embeddings", + openrouter: "https://openrouter.ai/api/v1/embeddings", + mistral: "https://api.mistral.ai/v1/embeddings", + "voyage-ai": "https://api.voyageai.com/v1/embeddings", + fireworks: "https://api.fireworks.ai/inference/v1/embeddings", + together: "https://api.together.xyz/v1/embeddings", + nebius: "https://api.tokenfactory.nebius.com/v1/embeddings", + github: "https://models.github.ai/inference/embeddings", + nvidia: "https://integrate.api.nvidia.com/v1/embeddings", + "jina-ai": "https://api.jina.ai/v1/embeddings", +}; + +export default function createOpenAIEmbeddingAdapter(providerId) { + return { + buildUrl: () => ENDPOINTS[providerId], + buildHeaders: (creds) => { + const headers = { "Content-Type": "application/json", ...bearerAuth(creds) }; + if (providerId === "openrouter") { + headers["HTTP-Referer"] = "https://endpoint-proxy.local"; + headers["X-Title"] = "Endpoint Proxy"; + } + return headers; + }, + buildBody: (model, { input, encoding_format, dimensions }) => { + const body = { model, input }; + if (encoding_format) body.encoding_format = encoding_format; + if (dimensions != null && dimensions !== "") { + const dim = Number(dimensions); + if (Number.isFinite(dim) && dim > 0) body.dimensions = dim; + } + return body; + }, + normalize: (responseBody) => responseBody, + }; +} diff --git a/open-sse/handlers/embeddingProviders/openaiCompatNode.js b/open-sse/handlers/embeddingProviders/openaiCompatNode.js new file mode 100644 index 00000000..6581b457 --- /dev/null +++ b/open-sse/handlers/embeddingProviders/openaiCompatNode.js @@ -0,0 +1,13 @@ +// Custom node providers (openai-compatible-* / custom-embedding-*) — baseUrl from credentials +import createOpenAIEmbeddingAdapter from "./openai.js"; + +const baseAdapter = createOpenAIEmbeddingAdapter("openai"); + +export default { + ...baseAdapter, + buildUrl: (_model, creds) => { + const rawBaseUrl = creds?.providerSpecificData?.baseUrl || "https://api.openai.com/v1"; + const baseUrl = rawBaseUrl.replace(/\/$/, "").replace(/\/embeddings$/, ""); + return `${baseUrl}/embeddings`; + }, +}; diff --git a/open-sse/handlers/embeddingsCore.js b/open-sse/handlers/embeddingsCore.js index b210b683..5a4c92ba 100644 --- a/open-sse/handlers/embeddingsCore.js +++ b/open-sse/handlers/embeddingsCore.js @@ -1,196 +1,13 @@ -import { getModelTargetFormat, PROVIDER_ID_TO_ALIAS } from "../config/providerModels.js"; import { createErrorResult, parseUpstreamError, formatProviderError } from "../utils/error.js"; import { HTTP_STATUS } from "../config/runtimeConfig.js"; import { getExecutor } from "../executors/index.js"; import { refreshWithRetry } from "../services/tokenRefresh.js"; - -// Google AI (Gemini) provider aliases / identifiers -const GEMINI_PROVIDERS = new Set(["gemini", "google_ai_studio"]); - -// Static map: provider id → embeddings endpoint (OpenAI-compatible body format) -const EMBEDDING_URLS = { - openai: "https://api.openai.com/v1/embeddings", - openrouter: "https://openrouter.ai/api/v1/embeddings", - mistral: "https://api.mistral.ai/v1/embeddings", - "voyage-ai": "https://api.voyageai.com/v1/embeddings", - fireworks: "https://api.fireworks.ai/inference/v1/embeddings", - together: "https://api.together.xyz/v1/embeddings", - nebius: "https://api.tokenfactory.nebius.com/v1/embeddings", - github: "https://models.github.ai/inference/embeddings", - nvidia: "https://integrate.api.nvidia.com/v1/embeddings", -}; +import { getEmbeddingAdapter } from "./embeddingProviders/index.js"; /** - * Check whether a provider targets the Google AI (Gemini) embeddings API. - * @param {string} provider - */ -function isGeminiProvider(provider) { - return GEMINI_PROVIDERS.has(provider); -} - -/** - * Build the embeddings request body for the target provider. + * Core embeddings handler — orchestrator only. Provider-specific URL/headers/body/normalize + * live in `./embeddingProviders/{id}.js`. * - * - OpenAI / openai-compatible / openrouter: standard { model, input } format. - * - Google AI (Gemini): different format per API spec. - * - Single input → embedContent body: { model, content: { parts: [{ text }] } } - * - Batch input → batchEmbedContents body: { requests: [{ model, content: { parts: [{ text }] } }] } - */ -function buildEmbeddingsBody(provider, model, input, encodingFormat, dimensions) { - if (isGeminiProvider(provider)) { - // Normalize model name: Gemini API expects "models/" prefix - const geminiModel = model.startsWith("models/") ? model : `models/${model}`; - - if (Array.isArray(input)) { - // Batch request - return { - requests: input.map((text) => ({ - model: geminiModel, - content: { parts: [{ text: String(text) }] } - })) - }; - } else { - // Single request - return { - model: geminiModel, - content: { parts: [{ text: String(input) }] } - }; - } - } - - // Default: OpenAI format - const body = { model, input }; - if (encodingFormat) { - body.encoding_format = encodingFormat; - } - if (dimensions != null && dimensions !== "") { - const dim = Number(dimensions); - if (Number.isFinite(dim) && dim > 0) body.dimensions = dim; - } - return body; -} - -/** - * Build the URL for the embeddings endpoint based on the provider. - * @param {string} provider - * @param {string} model - * @param {object} credentials - * @param {string|string[]} input - used to select single vs batch endpoint for Gemini - */ -function buildEmbeddingsUrl(provider, model, credentials, input) { - if (isGeminiProvider(provider)) { - const apiKey = credentials.apiKey || credentials.accessToken; - // Normalize model name for URL path - const modelPath = model.startsWith("models/") ? model : `models/${model}`; - - if (Array.isArray(input)) { - // batchEmbedContents for array input (keeps response format consistent even for length=1) - return `https://generativelanguage.googleapis.com/v1beta/${modelPath}:batchEmbedContents?key=${encodeURIComponent(apiKey)}`; - } - return `https://generativelanguage.googleapis.com/v1beta/${modelPath}:embedContent?key=${encodeURIComponent(apiKey)}`; - } - - if (EMBEDDING_URLS[provider]) return EMBEDDING_URLS[provider]; - - // openai-compatible & custom-embedding providers: use their baseUrl + /embeddings - if (provider?.startsWith?.("openai-compatible-") || provider?.startsWith?.("custom-embedding-")) { - const rawBaseUrl = credentials?.providerSpecificData?.baseUrl || "https://api.openai.com/v1"; - // Defensive: strip trailing slash and accidental /embeddings to avoid double-append - const baseUrl = rawBaseUrl.replace(/\/$/, "").replace(/\/embeddings$/, ""); - return `${baseUrl}/embeddings`; - } - return null; -} - -/** - * Build headers for the embeddings request. - */ -function buildEmbeddingsHeaders(provider, credentials) { - const headers = { "Content-Type": "application/json" }; - - if (isGeminiProvider(provider)) { - // Gemini API uses API key as query param — no Authorization header needed - return headers; - } - - switch (provider) { - case "openai": - case "openrouter": - headers["Authorization"] = `Bearer ${credentials.apiKey || credentials.accessToken}`; - if (provider === "openrouter") { - headers["HTTP-Referer"] = "https://endpoint-proxy.local"; - headers["X-Title"] = "Endpoint Proxy"; - } - break; - default: - headers["Authorization"] = `Bearer ${credentials.apiKey || credentials.accessToken}`; - } - - return headers; -} - -/** - * Normalize the embeddings response to OpenAI format. - * - * Gemini single response: - * { embedding: { values: [0.1, 0.2, ...] } } - * - * Gemini batch response: - * { embeddings: [{ values: [...] }, ...] } - * - * Target OpenAI format: - * { object: "list", data: [{ object: "embedding", index: 0, embedding: [...] }], model, usage: {...} } - */ -function normalizeEmbeddingsResponse(responseBody, model, provider) { - // Already in OpenAI format - if (responseBody.object === "list" && Array.isArray(responseBody.data)) { - return responseBody; - } - - if (isGeminiProvider(provider)) { - let embeddingItems = []; - - if (Array.isArray(responseBody.embeddings)) { - // Batch response - embeddingItems = responseBody.embeddings.map((emb, idx) => ({ - object: "embedding", - index: idx, - embedding: emb.values || [] - })); - } else if (responseBody.embedding?.values) { - // Single response - embeddingItems = [{ - object: "embedding", - index: 0, - embedding: responseBody.embedding.values - }]; - } - - return { - object: "list", - data: embeddingItems, - model, - usage: { - prompt_tokens: 0, - total_tokens: 0 - } - }; - } - - // Try to handle alternate formats gracefully - return responseBody; -} - -/** - * Core embeddings handler — shared between Worker and SSE server. - * - * @param {object} options - * @param {object} options.body - Parsed request body { model, input, encoding_format } - * @param {object} options.modelInfo - { provider, model } - * @param {object} options.credentials - Provider credentials - * @param {object} [options.log] - Logger - * @param {function} [options.onCredentialsRefreshed] - Called when creds are refreshed - * @param {function} [options.onRequestSuccess] - Called on success (clear error state) * @returns {Promise<{ success: boolean, response: Response, status?: number, error?: string }>} */ export async function handleEmbeddingsCore({ @@ -199,7 +16,7 @@ export async function handleEmbeddingsCore({ credentials, log, onCredentialsRefreshed, - onRequestSuccess + onRequestSuccess, }) { const { provider, model } = modelInfo; @@ -212,19 +29,22 @@ export async function handleEmbeddingsCore({ return createErrorResult(HTTP_STATUS.BAD_REQUEST, "input must be a string or array of strings"); } - const encodingFormat = body.encoding_format || "float"; - - // Determine embeddings URL - const url = buildEmbeddingsUrl(provider, model, credentials, input); - if (!url) { + const adapter = getEmbeddingAdapter(provider); + if (!adapter) { return createErrorResult( HTTP_STATUS.BAD_REQUEST, - `Provider '${provider}' does not support embeddings. Use openai, openrouter, gemini, or an openai-compatible provider.` + `Provider '${provider}' does not support embeddings.` ); } - const headers = buildEmbeddingsHeaders(provider, credentials); - const requestBody = buildEmbeddingsBody(provider, model, input, encodingFormat, body.dimensions); + const ctx = { input }; + const url = adapter.buildUrl(model, credentials, ctx); + const headers = adapter.buildHeaders(credentials, ctx); + const requestBody = adapter.buildBody(model, { + input, + encoding_format: body.encoding_format || "float", + dimensions: body.dimensions, + }); log?.debug?.("EMBEDDINGS", `${provider.toUpperCase()} | ${model} | input_type=${Array.isArray(input) ? `array[${input.length}]` : "string"}`); @@ -233,7 +53,7 @@ export async function handleEmbeddingsCore({ providerResponse = await fetch(url, { method: "POST", headers, - body: JSON.stringify(requestBody) + body: JSON.stringify(requestBody), }); } catch (error) { const errMsg = formatProviderError(error, provider, model, HTTP_STATUS.BAD_GATEWAY); @@ -244,9 +64,9 @@ export async function handleEmbeddingsCore({ // Handle 401/403 — try token refresh (skip for noAuth providers) const executor = getExecutor(provider); if ( - !executor.noAuth && + !executor?.noAuth && (providerResponse.status === HTTP_STATUS.UNAUTHORIZED || - providerResponse.status === HTTP_STATUS.FORBIDDEN) + providerResponse.status === HTTP_STATUS.FORBIDDEN) ) { const newCredentials = await refreshWithRetry( () => executor.refreshCredentials(credentials, log), @@ -257,24 +77,17 @@ export async function handleEmbeddingsCore({ if (newCredentials?.accessToken || newCredentials?.apiKey) { log?.info?.("TOKEN", `${provider.toUpperCase()} | refreshed for embeddings`); Object.assign(credentials, newCredentials); - if (onCredentialsRefreshed && newCredentials) { - await onCredentialsRefreshed(newCredentials); - } + if (onCredentialsRefreshed) await onCredentialsRefreshed(newCredentials); - // Retry with refreshed credentials try { - const retryHeaders = buildEmbeddingsHeaders(provider, credentials); - // Rebuild URL for Gemini since API key is embedded in query param - const retryUrl = isGeminiProvider(provider) - ? buildEmbeddingsUrl(provider, model, credentials, input) - : url; - + const retryHeaders = adapter.buildHeaders(credentials, ctx); + const retryUrl = adapter.buildUrl(model, credentials, ctx); providerResponse = await fetch(retryUrl, { method: "POST", headers: retryHeaders, - body: JSON.stringify(requestBody) + body: JSON.stringify(requestBody), }); - } catch (retryError) { + } catch { log?.warn?.("TOKEN", `${provider.toUpperCase()} | retry after refresh failed`); } } else { @@ -292,16 +105,13 @@ export async function handleEmbeddingsCore({ let responseBody; try { responseBody = await providerResponse.json(); - } catch (parseError) { + } catch { return createErrorResult(HTTP_STATUS.BAD_GATEWAY, `Invalid JSON response from ${provider}`); } - if (onRequestSuccess) { - await onRequestSuccess(); - } - - const normalized = normalizeEmbeddingsResponse(responseBody, model, provider); + if (onRequestSuccess) await onRequestSuccess(); + const normalized = adapter.normalize(responseBody, model); log?.debug?.("EMBEDDINGS", `Success | usage=${JSON.stringify(normalized.usage || {})}`); return { @@ -309,8 +119,8 @@ export async function handleEmbeddingsCore({ response: new Response(JSON.stringify(normalized), { headers: { "Content-Type": "application/json", - "Access-Control-Allow-Origin": "*" - } - }) + "Access-Control-Allow-Origin": "*", + }, + }), }; } diff --git a/open-sse/handlers/imageGenerationCore.js b/open-sse/handlers/imageGenerationCore.js index 1d5d12d5..3027f282 100644 --- a/open-sse/handlers/imageGenerationCore.js +++ b/open-sse/handlers/imageGenerationCore.js @@ -1,406 +1,23 @@ -import { randomUUID } from "node:crypto"; import { createErrorResult, parseUpstreamError, formatProviderError } from "../utils/error.js"; import { HTTP_STATUS } from "../config/runtimeConfig.js"; import { refreshWithRetry } from "../services/tokenRefresh.js"; import { getExecutor } from "../executors/index.js"; - -const CODEX_RESPONSES_URL = "https://chatgpt.com/backend-api/codex/responses"; -const CODEX_USER_AGENT = "codex-imagen/0.2.6"; -const CODEX_VERSION = "0.122.0"; -const CODEX_ORIGINATOR = "codex_cli_rs"; -const CODEX_MODEL_SUFFIX = "-image"; -const CODEX_REF_DETAIL = "high"; - -// Image provider configurations -const IMAGE_PROVIDERS = { - openai: { - baseUrl: "https://api.openai.com/v1/images/generations", - format: "openai", - }, - gemini: { - baseUrl: "https://generativelanguage.googleapis.com/v1beta/models", - format: "gemini", - }, - minimax: { - baseUrl: "https://api.minimaxi.com/v1/images/generations", - format: "openai", - }, - openrouter: { - baseUrl: "https://openrouter.ai/api/v1/images/generations", - format: "openai", - }, - nanobanana: { - baseUrl: "https://api.nanobananaapi.ai/api/v1/nanobanana/generate", - format: "nanobanana", - }, - sdwebui: { - baseUrl: "http://localhost:7860/sdapi/v1/txt2img", - format: "sdwebui", - }, - comfyui: { - baseUrl: "http://localhost:8188", - format: "comfyui", - }, - huggingface: { - baseUrl: "https://api-inference.huggingface.co/models", - format: "huggingface", - }, - codex: { - baseUrl: CODEX_RESPONSES_URL, - format: "codex", - stream: true, - }, -}; - -// Decode codex chatgpt account id from idToken if not stored -function decodeCodexAccountId(idToken) { - try { - const parts = String(idToken || "").split("."); - if (parts.length !== 3) return null; - const b64 = parts[1].replace(/-/g, "+").replace(/_/g, "/"); - const pad = (4 - (b64.length % 4)) % 4; - const payload = JSON.parse(Buffer.from(b64 + "=".repeat(pad), "base64").toString("utf8")); - return payload?.["https://api.openai.com/auth"]?.chatgpt_account_id || null; - } catch { - return null; - } -} - -// Strip "-image" suffix to get the underlying chat model -function stripCodexImageModel(model) { - return model.endsWith(CODEX_MODEL_SUFFIX) - ? model.slice(0, -CODEX_MODEL_SUFFIX.length) - : model; -} - -// Normalize a single ref image input to a data URL -function toCodexDataUrl(input) { - if (!input) return null; - if (typeof input !== "string") return null; - if (/^data:image\//i.test(input) || /^https?:\/\//i.test(input)) return input; - // assume raw base64 PNG - return `data:image/png;base64,${input}`; -} - -// Build content array with optional reference images, mirroring codex-imagen tagging -function buildCodexContent(prompt, refs, detail = CODEX_REF_DETAIL) { - const content = []; - refs.forEach((url, index) => { - content.push({ type: "input_text", text: `` }); - content.push({ type: "input_image", image_url: url, detail }); - content.push({ type: "input_text", text: "" }); - }); - content.push({ type: "input_text", text: prompt }); - return content; -} - -// Parse Codex SSE stream, log progress, return final base64 image. -// Optional callbacks let caller forward events to client (SSE pipe). -async function parseCodexImageStream(response, log, callbacks = {}) { - const reader = response.body.getReader(); - const decoder = new TextDecoder(); - let buffer = ""; - let imageB64 = null; - let lastEvent = null; - let bytesReceived = 0; - let lastProgressLogMs = 0; - - while (true) { - const { done, value } = await reader.read(); - if (done) break; - bytesReceived += value?.byteLength || 0; - buffer += decoder.decode(value, { stream: true }); - - // SSE events separated by blank line - let sepIdx; - while ((sepIdx = buffer.indexOf("\n\n")) !== -1) { - const block = buffer.slice(0, sepIdx); - buffer = buffer.slice(sepIdx + 2); - - const lines = block.split("\n"); - let eventName = null; - let dataStr = ""; - for (const line of lines) { - if (line.startsWith("event:")) eventName = line.slice(6).trim(); - else if (line.startsWith("data:")) dataStr += line.slice(5).trim(); - } - if (!eventName) continue; - if (eventName !== lastEvent) { - log?.info?.("IMAGE", `codex progress: ${eventName}`); - lastEvent = eventName; - } - - // Notify caller about progress (throttled to ~5/s to avoid flooding) - const now = Date.now(); - if (callbacks.onProgress && now - lastProgressLogMs > 200) { - lastProgressLogMs = now; - callbacks.onProgress({ stage: eventName, bytesReceived }); - } - - if (eventName === "response.image_generation_call.partial_image" && dataStr) { - try { - const data = JSON.parse(dataStr); - if (callbacks.onPartialImage && data?.partial_image_b64) { - callbacks.onPartialImage({ b64_json: data.partial_image_b64, index: data.partial_image_index }); - } - } catch {} - } - - if (eventName === "response.output_item.done" && dataStr) { - try { - const data = JSON.parse(dataStr); - const item = data?.item; - if (item?.type === "image_generation_call" && item.result) { - imageB64 = item.result; - } - } catch {} - } - } - } - return imageB64; -} - -// Build SSE Response that pipes codex progress + partial + done events to client -function buildCodexSseResponse(providerResponse, log, onSuccess) { - const stream = new ReadableStream({ - async start(controller) { - const enc = new TextEncoder(); - const send = (event, data) => { - controller.enqueue(enc.encode(`event: ${event}\ndata: ${JSON.stringify(data)}\n\n`)); - }; - try { - const b64 = await parseCodexImageStream(providerResponse, log, { - onProgress: (info) => send("progress", info), - onPartialImage: (info) => send("partial_image", info), - }); - if (!b64) { - send("error", { message: "Codex did not return an image. Account may not be entitled (Plus/Pro required)." }); - } else { - if (onSuccess) await onSuccess(); - send("done", { - created: Math.floor(Date.now() / 1000), - data: [{ b64_json: b64 }], - }); - } - } catch (err) { - send("error", { message: err?.message || "Stream failed" }); - } finally { - controller.close(); - } - }, - }); - return new Response(stream, { - headers: { - "Content-Type": "text/event-stream", - "Cache-Control": "no-cache, no-transform", - "Connection": "keep-alive", - "X-Accel-Buffering": "no", - "Access-Control-Allow-Origin": "*", - }, - }); -} +import { getImageAdapter } from "./imageProviders/index.js"; +import { urlToBase64 } from "./imageProviders/_base.js"; /** - * Build image generation URL - */ -function buildImageUrl(provider, model, credentials) { - const config = IMAGE_PROVIDERS[provider]; - if (!config) return null; - - switch (provider) { - case "gemini": { - const apiKey = credentials?.apiKey || credentials?.accessToken; - const modelId = model.replace(/^models\//, ""); - return `${config.baseUrl}/${modelId}:generateContent?key=${encodeURIComponent(apiKey)}`; - } - case "huggingface": - return `${config.baseUrl}/${model}`; - case "codex": - return CODEX_RESPONSES_URL; - default: - return config.baseUrl; - } -} - -/** - * Build request headers - */ -function buildImageHeaders(provider, credentials) { - const headers = { "Content-Type": "application/json" }; - - if (provider === "gemini") { - return headers; - } - - if (provider === "codex") { - const accountId = - credentials?.providerSpecificData?.chatgptAccountId || - decodeCodexAccountId(credentials?.idToken); - return { - "accept": "text/event-stream, application/json", - "authorization": `Bearer ${credentials?.accessToken || ""}`, - "chatgpt-account-id": accountId || "", - "content-type": "application/json", - "originator": CODEX_ORIGINATOR, - "session_id": randomUUID(), - "user-agent": CODEX_USER_AGENT, - "version": CODEX_VERSION, - "x-client-request-id": randomUUID(), - }; - } - - if (provider === "openrouter") { - headers["Authorization"] = `Bearer ${credentials?.apiKey || credentials?.accessToken}`; - headers["HTTP-Referer"] = "https://endpoint-proxy.local"; - headers["X-Title"] = "Endpoint Proxy"; - return headers; - } - - if (provider === "huggingface") { - headers["Authorization"] = `Bearer ${credentials?.apiKey || credentials?.accessToken}`; - return headers; - } - - if (credentials?.apiKey || credentials?.accessToken) { - headers["Authorization"] = `Bearer ${credentials.apiKey || credentials.accessToken}`; - } - - return headers; -} - -/** - * Build request body based on provider format - */ -function buildImageBody(provider, model, body) { - const { prompt, n = 1, size = "1024x1024", quality, style, response_format, image, images } = body; - - switch (provider) { - case "codex": { - const refs = []; - if (Array.isArray(images)) images.forEach((i) => { const u = toCodexDataUrl(i); if (u) refs.push(u); }); - const single = toCodexDataUrl(image); - if (single) refs.push(single); - const detail = body.image_detail || CODEX_REF_DETAIL; - const imgTool = { type: "image_generation", output_format: (body.output_format || "png").toLowerCase() }; - if (body.size && body.size !== "") imgTool.size = body.size; - if (body.quality && body.quality !== "") imgTool.quality = body.quality; - if (body.background && body.background !== "") imgTool.background = body.background; - return { - model: stripCodexImageModel(model), - instructions: "", - input: [{ type: "message", role: "user", content: buildCodexContent(prompt, refs, detail) }], - tools: [imgTool], - tool_choice: "auto", - parallel_tool_calls: false, - prompt_cache_key: randomUUID(), - stream: true, - store: false, - reasoning: null, - }; - } - - case "gemini": - return { - contents: [{ parts: [{ text: prompt }] }], - generationConfig: { - responseModalities: ["TEXT", "IMAGE"], - }, - }; - - case "sdwebui": { - const [width, height] = size.split("x").map(Number); - return { - prompt, - width: width || 512, - height: height || 512, - steps: 20, - batch_size: n, - }; - } - - case "nanobanana": { - const sizeMap = { - "1024x1024": "1:1", - "1024x1792": "9:16", - "1792x1024": "16:9", - }; - return { - prompt, - type: "TEXTTOIAMGE", - numImages: n, - image_size: sizeMap[size] || "1:1", - }; - } - - default: - // OpenAI-compatible format - const requestBody = { model, prompt, n, size }; - if (quality) requestBody.quality = quality; - if (style) requestBody.style = style; - if (response_format) requestBody.response_format = response_format; - return requestBody; - } -} - -/** - * Normalize response to OpenAI format - */ -function normalizeImageResponse(responseBody, provider, prompt) { - // Already in OpenAI format - if (responseBody.created && Array.isArray(responseBody.data)) { - return responseBody; - } - - const timestamp = Math.floor(Date.now() / 1000); - - switch (provider) { - case "gemini": { - const parts = responseBody.candidates?.[0]?.content?.parts || []; - const images = parts - .filter((p) => p.inlineData?.data) - .map((p) => ({ b64_json: p.inlineData.data })); - return { - created: timestamp, - data: images.length > 0 ? images : [{ b64_json: "", revised_prompt: prompt }], - }; - } - - case "sdwebui": { - const images = Array.isArray(responseBody.images) - ? responseBody.images.map((img) => ({ b64_json: img })) - : []; - return { created: timestamp, data: images }; - } - - case "nanobanana": { - if (responseBody.image) { - return { - created: timestamp, - data: [{ b64_json: responseBody.image, revised_prompt: prompt }], - }; - } - return { created: timestamp, data: [] }; - } - - case "huggingface": { - // HuggingFace returns binary image data - return responseBody; - } - - default: - return responseBody; - } -} - -/** - * Core image generation handler + * Core image generation handler — orchestrator only. + * Provider-specific URL/headers/body/parse/normalize live in `./imageProviders/{id}.js`. + * * @param {object} options * @param {object} options.body - Request body { model, prompt, n, size, ... } * @param {object} options.modelInfo - { provider, model } * @param {object} options.credentials - Provider credentials * @param {object} [options.log] - Logger - * @param {function} [options.onCredentialsRefreshed] - Called when creds are refreshed - * @param {function} [options.onRequestSuccess] - Called on success + * @param {boolean} [options.streamToClient] - Pipe SSE to client (codex) + * @param {boolean} [options.binaryOutput] - Return raw image bytes + * @param {function} [options.onCredentialsRefreshed] + * @param {function} [options.onRequestSuccess] * @returns {Promise<{ success: boolean, response: Response, status?: number, error?: string }>} */ export async function handleImageGenerationCore({ @@ -419,16 +36,17 @@ export async function handleImageGenerationCore({ return createErrorResult(HTTP_STATUS.BAD_REQUEST, "Missing required field: prompt"); } - const url = buildImageUrl(provider, model, credentials); - if (!url) { + const adapter = getImageAdapter(provider); + if (!adapter) { return createErrorResult( HTTP_STATUS.BAD_REQUEST, `Provider '${provider}' does not support image generation` ); } - const headers = buildImageHeaders(provider, credentials); - const requestBody = buildImageBody(provider, model, body); + const url = adapter.buildUrl(model, credentials); + const headers = adapter.buildHeaders(credentials); + const requestBody = adapter.buildBody(model, body); log?.debug?.("IMAGE", `${provider.toUpperCase()} | ${model} | prompt="${body.prompt.slice(0, 50)}..."`); @@ -445,10 +63,11 @@ export async function handleImageGenerationCore({ return createErrorResult(HTTP_STATUS.BAD_GATEWAY, errMsg); } - // Handle 401/403 — try token refresh + // Handle 401/403 — try token refresh (skipped for noAuth providers) const executor = getExecutor(provider); if ( !executor?.noAuth && + !adapter.noAuth && (providerResponse.status === HTTP_STATUS.UNAUTHORIZED || providerResponse.status === HTTP_STATUS.FORBIDDEN) ) { @@ -461,20 +80,17 @@ export async function handleImageGenerationCore({ if (newCredentials?.accessToken || newCredentials?.apiKey) { log?.info?.("TOKEN", `${provider.toUpperCase()} | refreshed for image generation`); Object.assign(credentials, newCredentials); - if (onCredentialsRefreshed && newCredentials) { - await onCredentialsRefreshed(newCredentials); - } + if (onCredentialsRefreshed) await onCredentialsRefreshed(newCredentials); try { - const retryHeaders = buildImageHeaders(provider, credentials); - const retryUrl = provider === "gemini" ? buildImageUrl(provider, model, credentials) : url; - + const retryHeaders = adapter.buildHeaders(credentials); + const retryUrl = adapter.buildUrl(model, credentials); providerResponse = await fetch(retryUrl, { method: "POST", headers: retryHeaders, body: JSON.stringify(requestBody), }); - } catch (retryError) { + } catch { log?.warn?.("TOKEN", `${provider.toUpperCase()} | retry after refresh failed`); } } else { @@ -489,51 +105,42 @@ export async function handleImageGenerationCore({ return createErrorResult(statusCode, errMsg); } - let responseBody; + // Parse provider response — adapter may override (codex SSE / async polling / binary) + let parsed; try { - if (provider === "huggingface") { - const buffer = await providerResponse.arrayBuffer(); - const base64 = Buffer.from(buffer).toString("base64"); - responseBody = { - created: Math.floor(Date.now() / 1000), - data: [{ b64_json: base64 }], - }; - } else if (provider === "codex") { - // SSE pipe to client (progress + partial_image + done) - if (streamToClient) { - return { - success: true, - response: buildCodexSseResponse(providerResponse, log, onRequestSuccess), - }; + if (adapter.parseResponse) { + parsed = await adapter.parseResponse(providerResponse, { + headers, + log, + streamToClient, + onRequestSuccess, + }); + // Codex streaming case: returns an SSE Response directly + if (parsed?.sseResponse) { + return { success: true, response: parsed.sseResponse }; } - const b64 = await parseCodexImageStream(providerResponse, log); - if (!b64) { - return createErrorResult( - HTTP_STATUS.BAD_GATEWAY, - "Codex did not return an image. Account may not be entitled (Plus/Pro required)." - ); - } - responseBody = { - created: Math.floor(Date.now() / 1000), - data: [{ b64_json: b64 }], - }; } else { - responseBody = await providerResponse.json(); + parsed = await providerResponse.json(); } } catch (parseError) { - return createErrorResult(HTTP_STATUS.BAD_GATEWAY, `Invalid response from ${provider}`); + return createErrorResult(HTTP_STATUS.BAD_GATEWAY, parseError.message || `Invalid response from ${provider}`); } - if (onRequestSuccess) { - await onRequestSuccess(); - } + if (onRequestSuccess) await onRequestSuccess(); - const normalized = normalizeImageResponse(responseBody, provider, body.prompt); + // Normalize → OpenAI-compatible shape + const normalized = adapter.normalize(parsed, body.prompt); - // Binary output: decode first b64_json into raw bytes + // Already in OpenAI shape? skip re-normalize + const finalBody = (normalized.created && Array.isArray(normalized.data)) ? normalized : parsed; + + // Binary output: decode first b64_json (or fetch url) into raw bytes if (binaryOutput) { - const first = normalized.data?.[0]; - const b64 = first?.b64_json; + const first = finalBody.data?.[0]; + let b64 = first?.b64_json; + if (!b64 && first?.url) { + try { b64 = await urlToBase64(first.url); } catch {} + } if (b64) { const buf = Buffer.from(b64, "base64"); const fmt = (body.output_format || "png").toLowerCase(); @@ -553,7 +160,7 @@ export async function handleImageGenerationCore({ return { success: true, - response: new Response(JSON.stringify(normalized), { + response: new Response(JSON.stringify(finalBody), { headers: { "Content-Type": "application/json", "Access-Control-Allow-Origin": "*", diff --git a/open-sse/handlers/imageProviders/_base.js b/open-sse/handlers/imageProviders/_base.js new file mode 100644 index 00000000..f8902de2 --- /dev/null +++ b/open-sse/handlers/imageProviders/_base.js @@ -0,0 +1,31 @@ +// Shared helpers for image provider adapters + +export const POLL_INTERVAL_MS = 1500; +export const POLL_TIMEOUT_MS = 120000; + +export const sleep = (ms) => new Promise((r) => setTimeout(r, ms)); + +// Map OpenAI size to provider-specific aspect ratio +export function sizeToAspectRatio(size) { + if (!size || typeof size !== "string") return "1:1"; + const map = { + "1024x1024": "1:1", + "1024x1792": "9:16", + "1792x1024": "16:9", + "1024x1536": "2:3", + "1536x1024": "3:2", + }; + return map[size] || "1:1"; +} + +// Fetch URL → base64 (for providers returning image URLs) +export async function urlToBase64(url) { + const res = await fetch(url); + if (!res.ok) throw new Error(`Failed to fetch image: ${res.status}`); + const buf = await res.arrayBuffer(); + return Buffer.from(buf).toString("base64"); +} + +export function nowSec() { + return Math.floor(Date.now() / 1000); +} diff --git a/open-sse/handlers/imageProviders/blackForestLabs.js b/open-sse/handlers/imageProviders/blackForestLabs.js new file mode 100644 index 00000000..c1e73675 --- /dev/null +++ b/open-sse/handlers/imageProviders/blackForestLabs.js @@ -0,0 +1,43 @@ +// Black Forest Labs (FLUX) — async submit + polling_url +import { sleep, nowSec, POLL_INTERVAL_MS, POLL_TIMEOUT_MS } from "./_base.js"; + +const BASE_URL = "https://api.bfl.ai/v1"; + +export default { + async: true, + buildUrl: (model) => `${BASE_URL}/${model}`, + buildHeaders: (creds) => { + const key = creds?.apiKey || creds?.accessToken; + return { "Content-Type": "application/json", "x-key": key }; + }, + buildBody: (_model, body) => { + const req = { prompt: body.prompt }; + if (body.size) { + const [w, h] = body.size.split("x").map(Number); + if (w) req.width = w; + if (h) req.height = h; + } + if (body.image) req.image_prompt = body.image; + return req; + }, + async parseResponse(response, { headers }) { + const data = await response.json(); + const pollingUrl = data.polling_url; + if (!pollingUrl) throw new Error("BFL: no polling_url returned"); + const deadline = Date.now() + POLL_TIMEOUT_MS; + while (Date.now() < deadline) { + await sleep(POLL_INTERVAL_MS); + const r = await fetch(pollingUrl, { headers: { "x-key": headers["x-key"], "Accept": "application/json" } }); + if (!r.ok) throw new Error(`BFL status ${r.status}`); + const s = await r.json(); + if (s.status === "Ready") return s; + if (s.status === "Error" || s.status === "Failed") throw new Error(s.error || "BFL generation failed"); + } + throw new Error("BFL polling timeout"); + }, + normalize: (responseBody) => { + const sample = responseBody.result?.sample; + if (sample) return { created: nowSec(), data: [{ url: sample }] }; + return { created: nowSec(), data: [] }; + }, +}; diff --git a/open-sse/handlers/imageProviders/codex.js b/open-sse/handlers/imageProviders/codex.js new file mode 100644 index 00000000..776c4ee5 --- /dev/null +++ b/open-sse/handlers/imageProviders/codex.js @@ -0,0 +1,198 @@ +// Codex (ChatGPT Plus/Pro) image generation via Responses API + SSE +import { randomUUID } from "node:crypto"; +import { nowSec } from "./_base.js"; + +const CODEX_RESPONSES_URL = "https://chatgpt.com/backend-api/codex/responses"; +const CODEX_USER_AGENT = "codex-imagen/0.2.6"; +const CODEX_VERSION = "0.122.0"; +const CODEX_ORIGINATOR = "codex_cli_rs"; +const CODEX_MODEL_SUFFIX = "-image"; +const CODEX_REF_DETAIL = "high"; + +function decodeAccountId(idToken) { + try { + const parts = String(idToken || "").split("."); + if (parts.length !== 3) return null; + const b64 = parts[1].replace(/-/g, "+").replace(/_/g, "/"); + const pad = (4 - (b64.length % 4)) % 4; + const payload = JSON.parse(Buffer.from(b64 + "=".repeat(pad), "base64").toString("utf8")); + return payload?.["https://api.openai.com/auth"]?.chatgpt_account_id || null; + } catch { + return null; + } +} + +function stripImageSuffix(model) { + return model.endsWith(CODEX_MODEL_SUFFIX) ? model.slice(0, -CODEX_MODEL_SUFFIX.length) : model; +} + +function toDataUrl(input) { + if (!input || typeof input !== "string") return null; + if (/^data:image\//i.test(input) || /^https?:\/\//i.test(input)) return input; + return `data:image/png;base64,${input}`; +} + +function buildContent(prompt, refs, detail = CODEX_REF_DETAIL) { + const content = []; + refs.forEach((url, index) => { + content.push({ type: "input_text", text: `` }); + content.push({ type: "input_image", image_url: url, detail }); + content.push({ type: "input_text", text: "" }); + }); + content.push({ type: "input_text", text: prompt }); + return content; +} + +// Parse Codex SSE stream → final base64 image. Optional callbacks for client streaming. +async function parseStream(response, log, callbacks = {}) { + const reader = response.body.getReader(); + const decoder = new TextDecoder(); + let buffer = ""; + let imageB64 = null; + let lastEvent = null; + let bytesReceived = 0; + let lastProgressLogMs = 0; + + while (true) { + const { done, value } = await reader.read(); + if (done) break; + bytesReceived += value?.byteLength || 0; + buffer += decoder.decode(value, { stream: true }); + + let sepIdx; + while ((sepIdx = buffer.indexOf("\n\n")) !== -1) { + const block = buffer.slice(0, sepIdx); + buffer = buffer.slice(sepIdx + 2); + + const lines = block.split("\n"); + let eventName = null; + let dataStr = ""; + for (const line of lines) { + if (line.startsWith("event:")) eventName = line.slice(6).trim(); + else if (line.startsWith("data:")) dataStr += line.slice(5).trim(); + } + if (!eventName) continue; + if (eventName !== lastEvent) { + log?.info?.("IMAGE", `codex progress: ${eventName}`); + lastEvent = eventName; + } + + const now = Date.now(); + if (callbacks.onProgress && now - lastProgressLogMs > 200) { + lastProgressLogMs = now; + callbacks.onProgress({ stage: eventName, bytesReceived }); + } + + if (eventName === "response.image_generation_call.partial_image" && dataStr) { + try { + const data = JSON.parse(dataStr); + if (callbacks.onPartialImage && data?.partial_image_b64) { + callbacks.onPartialImage({ b64_json: data.partial_image_b64, index: data.partial_image_index }); + } + } catch {} + } + + if (eventName === "response.output_item.done" && dataStr) { + try { + const data = JSON.parse(dataStr); + const item = data?.item; + if (item?.type === "image_generation_call" && item.result) { + imageB64 = item.result; + } + } catch {} + } + } + } + return imageB64; +} + +// SSE Response that pipes codex progress + partial + done events to client +function buildSseResponse(providerResponse, log, onSuccess) { + const stream = new ReadableStream({ + async start(controller) { + const enc = new TextEncoder(); + const send = (event, data) => { + controller.enqueue(enc.encode(`event: ${event}\ndata: ${JSON.stringify(data)}\n\n`)); + }; + try { + const b64 = await parseStream(providerResponse, log, { + onProgress: (info) => send("progress", info), + onPartialImage: (info) => send("partial_image", info), + }); + if (!b64) { + send("error", { message: "Codex did not return an image. Account may not be entitled (Plus/Pro required)." }); + } else { + if (onSuccess) await onSuccess(); + send("done", { created: nowSec(), data: [{ b64_json: b64 }] }); + } + } catch (err) { + send("error", { message: err?.message || "Stream failed" }); + } finally { + controller.close(); + } + }, + }); + return new Response(stream, { + headers: { + "Content-Type": "text/event-stream", + "Cache-Control": "no-cache, no-transform", + "Connection": "keep-alive", + "X-Accel-Buffering": "no", + "Access-Control-Allow-Origin": "*", + }, + }); +} + +export default { + stream: true, + buildUrl: () => CODEX_RESPONSES_URL, + buildHeaders: (creds) => { + const accountId = creds?.providerSpecificData?.chatgptAccountId || decodeAccountId(creds?.idToken); + return { + "accept": "text/event-stream, application/json", + "authorization": `Bearer ${creds?.accessToken || ""}`, + "chatgpt-account-id": accountId || "", + "content-type": "application/json", + "originator": CODEX_ORIGINATOR, + "session_id": randomUUID(), + "user-agent": CODEX_USER_AGENT, + "version": CODEX_VERSION, + "x-client-request-id": randomUUID(), + }; + }, + buildBody: (model, body) => { + const refs = []; + if (Array.isArray(body.images)) body.images.forEach((i) => { const u = toDataUrl(i); if (u) refs.push(u); }); + const single = toDataUrl(body.image); + if (single) refs.push(single); + const detail = body.image_detail || CODEX_REF_DETAIL; + const imgTool = { type: "image_generation", output_format: (body.output_format || "png").toLowerCase() }; + if (body.size && body.size !== "") imgTool.size = body.size; + if (body.quality && body.quality !== "") imgTool.quality = body.quality; + if (body.background && body.background !== "") imgTool.background = body.background; + return { + model: stripImageSuffix(model), + instructions: "", + input: [{ type: "message", role: "user", content: buildContent(body.prompt, refs, detail) }], + tools: [imgTool], + tool_choice: "auto", + parallel_tool_calls: false, + prompt_cache_key: randomUUID(), + stream: true, + store: false, + reasoning: null, + }; + }, + // Custom: codex parses SSE → either pipe to client or collect b64 + async parseResponse(response, { log, streamToClient, onRequestSuccess }) { + if (streamToClient) { + return { sseResponse: buildSseResponse(response, log, onRequestSuccess) }; + } + const b64 = await parseStream(response, log); + if (!b64) { + throw new Error("Codex did not return an image. Account may not be entitled (Plus/Pro required)."); + } + return { created: nowSec(), data: [{ b64_json: b64 }] }; + }, + normalize: (responseBody) => responseBody, +}; diff --git a/open-sse/handlers/imageProviders/comfyui.js b/open-sse/handlers/imageProviders/comfyui.js new file mode 100644 index 00000000..6a37a44b --- /dev/null +++ b/open-sse/handlers/imageProviders/comfyui.js @@ -0,0 +1,8 @@ +// ComfyUI — local, noAuth (placeholder; full graph workflow not implemented) +export default { + noAuth: true, + buildUrl: () => "http://localhost:8188", + buildHeaders: () => ({ "Content-Type": "application/json" }), + buildBody: (_model, body) => ({ prompt: body.prompt }), + normalize: (responseBody) => responseBody, +}; diff --git a/open-sse/handlers/imageProviders/falAi.js b/open-sse/handlers/imageProviders/falAi.js new file mode 100644 index 00000000..191d9a89 --- /dev/null +++ b/open-sse/handlers/imageProviders/falAi.js @@ -0,0 +1,41 @@ +// Fal.ai — async submit + queue polling +import { sleep, nowSec, sizeToAspectRatio, POLL_INTERVAL_MS, POLL_TIMEOUT_MS } from "./_base.js"; + +const BASE_URL = "https://queue.fal.run"; + +export default { + async: true, + buildUrl: (model) => `${BASE_URL}/${model}`, + buildHeaders: (creds) => { + const key = creds?.apiKey || creds?.accessToken; + return { "Content-Type": "application/json", "Authorization": `Key ${key}` }; + }, + buildBody: (_model, body) => { + const req = { prompt: body.prompt, num_images: body.n || 1 }; + if (body.size) req.image_size = sizeToAspectRatio(body.size); + if (body.image) req.image_url = body.image; + return req; + }, + async parseResponse(response, { headers }) { + const { status_url, response_url } = await response.json(); + const deadline = Date.now() + POLL_TIMEOUT_MS; + while (Date.now() < deadline) { + await sleep(POLL_INTERVAL_MS); + const r = await fetch(status_url, { headers }); + if (!r.ok) throw new Error(`Fal status ${r.status}`); + const s = await r.json(); + if (s.status === "COMPLETED") { + const fr = await fetch(response_url, { headers }); + return await fr.json(); + } + if (s.status === "FAILED") throw new Error(s.error || "Fal generation failed"); + } + throw new Error("Fal polling timeout"); + }, + normalize: (responseBody) => { + const images = Array.isArray(responseBody.images) + ? responseBody.images + : (responseBody.image ? [responseBody.image] : []); + return { created: nowSec(), data: images.map((img) => ({ url: img.url || img })) }; + }, +}; diff --git a/open-sse/handlers/imageProviders/gemini.js b/open-sse/handlers/imageProviders/gemini.js new file mode 100644 index 00000000..3a52ea95 --- /dev/null +++ b/open-sse/handlers/imageProviders/gemini.js @@ -0,0 +1,25 @@ +// Google Gemini adapter (Nano Banana models) +import { nowSec } from "./_base.js"; + +const BASE_URL = "https://generativelanguage.googleapis.com/v1beta/models"; + +export default { + buildUrl: (model, creds) => { + const apiKey = creds?.apiKey || creds?.accessToken; + const modelId = model.replace(/^models\//, ""); + return `${BASE_URL}/${modelId}:generateContent?key=${encodeURIComponent(apiKey)}`; + }, + buildHeaders: () => ({ "Content-Type": "application/json" }), + buildBody: (_model, body) => ({ + contents: [{ parts: [{ text: body.prompt }] }], + generationConfig: { responseModalities: ["TEXT", "IMAGE"] }, + }), + normalize: (responseBody, prompt) => { + const parts = responseBody.candidates?.[0]?.content?.parts || []; + const images = parts.filter((p) => p.inlineData?.data).map((p) => ({ b64_json: p.inlineData.data })); + return { + created: nowSec(), + data: images.length > 0 ? images : [{ b64_json: "", revised_prompt: prompt }], + }; + }, +}; diff --git a/open-sse/handlers/imageProviders/huggingface.js b/open-sse/handlers/imageProviders/huggingface.js new file mode 100644 index 00000000..9b3a03b3 --- /dev/null +++ b/open-sse/handlers/imageProviders/huggingface.js @@ -0,0 +1,22 @@ +// HuggingFace Inference API — returns binary image +import { nowSec } from "./_base.js"; + +const BASE_URL = "https://api-inference.huggingface.co/models"; + +export default { + buildUrl: (model) => `${BASE_URL}/${model}`, + buildHeaders: (creds) => { + const headers = { "Content-Type": "application/json" }; + const key = creds?.apiKey || creds?.accessToken; + if (key) headers["Authorization"] = `Bearer ${key}`; + return headers; + }, + buildBody: (_model, body) => ({ inputs: body.prompt }), + // HF returns raw image bytes — convert to b64_json + async parseResponse(response) { + const buf = await response.arrayBuffer(); + const base64 = Buffer.from(buf).toString("base64"); + return { created: nowSec(), data: [{ b64_json: base64 }] }; + }, + normalize: (responseBody) => responseBody, +}; diff --git a/open-sse/handlers/imageProviders/index.js b/open-sse/handlers/imageProviders/index.js new file mode 100644 index 00000000..83dcfc73 --- /dev/null +++ b/open-sse/handlers/imageProviders/index.js @@ -0,0 +1,37 @@ +// Image provider adapter registry +import createOpenAIAdapter from "./openai.js"; +import gemini from "./gemini.js"; +import codex from "./codex.js"; +import sdwebui from "./sdwebui.js"; +import comfyui from "./comfyui.js"; +import huggingface from "./huggingface.js"; +import nanobanana from "./nanobanana.js"; +import falAi from "./falAi.js"; +import stabilityAi from "./stabilityAi.js"; +import blackForestLabs from "./blackForestLabs.js"; +import runwayml from "./runwayml.js"; + +const ADAPTERS = { + openai: createOpenAIAdapter("openai"), + minimax: createOpenAIAdapter("minimax"), + openrouter: createOpenAIAdapter("openrouter"), + recraft: createOpenAIAdapter("recraft"), + gemini, + codex, + sdwebui, + comfyui, + huggingface, + nanobanana, + "fal-ai": falAi, + "stability-ai": stabilityAi, + "black-forest-labs": blackForestLabs, + runwayml, +}; + +export function getImageAdapter(provider) { + return ADAPTERS[provider] || null; +} + +export function isImageProvider(provider) { + return provider in ADAPTERS; +} diff --git a/open-sse/handlers/imageProviders/nanobanana.js b/open-sse/handlers/imageProviders/nanobanana.js new file mode 100644 index 00000000..6bb18fc1 --- /dev/null +++ b/open-sse/handlers/imageProviders/nanobanana.js @@ -0,0 +1,58 @@ +// NanoBanana API — async submit + poll record-info +import { sleep, nowSec, sizeToAspectRatio, POLL_INTERVAL_MS, POLL_TIMEOUT_MS } from "./_base.js"; + +const SUBMIT_URL = "https://api.nanobananaapi.ai/api/v1/nanobanana/generate"; +const POLL_BASE = "https://api.nanobananaapi.ai/api/v1/nanobanana/record-info"; + +export default { + async: true, + buildUrl: () => SUBMIT_URL, + buildHeaders: (creds) => { + const headers = { "Content-Type": "application/json" }; + const key = creds?.apiKey || creds?.accessToken; + if (key) headers["Authorization"] = `Bearer ${key}`; + return headers; + }, + buildBody: (_model, body) => { + const ratio = sizeToAspectRatio(body.size); + const isEdit = !!(body.image || (Array.isArray(body.images) && body.images.length)); + const req = { + prompt: body.prompt, + type: isEdit ? "IMAGETOIAMGE" : "TEXTTOIAMGE", + numImages: body.n || 1, + image_size: ratio, + // API requires callBackUrl; we poll instead so a dummy URL is fine. + callBackUrl: "https://localhost/callback", + }; + if (isEdit) { + const urls = Array.isArray(body.images) ? body.images.filter(Boolean) : []; + if (body.image) urls.push(body.image); + req.imageUrls = urls; + } + return req; + }, + // Async: parse submit → poll until SUCCESS, return raw poll data + async parseResponse(response, { headers }) { + const submitData = await response.json(); + if (submitData.code !== 200) throw new Error(submitData.msg || "NanoBanana submit failed"); + const taskId = submitData.data?.taskId; + if (!taskId) throw new Error("NanoBanana: no taskId returned"); + const pollUrl = `${POLL_BASE}?taskId=${encodeURIComponent(taskId)}`; + const deadline = Date.now() + POLL_TIMEOUT_MS; + while (Date.now() < deadline) { + await sleep(POLL_INTERVAL_MS); + const r = await fetch(pollUrl, { headers }); + if (!r.ok) throw new Error(`NanoBanana status ${r.status}`); + const s = await r.json(); + const flag = s.data?.successFlag; + if (flag === 1) return s.data; + if (flag === 2 || flag === 3) throw new Error(s.data?.errorMessage || "NanoBanana generation failed"); + } + throw new Error("NanoBanana polling timeout"); + }, + normalize: (responseBody, prompt) => { + const url = responseBody.response?.resultImageUrl || responseBody.response?.originImageUrl; + if (url) return { created: nowSec(), data: [{ url, revised_prompt: prompt }] }; + return { created: nowSec(), data: [] }; + }, +}; diff --git a/open-sse/handlers/imageProviders/openai.js b/open-sse/handlers/imageProviders/openai.js new file mode 100644 index 00000000..2e034071 --- /dev/null +++ b/open-sse/handlers/imageProviders/openai.js @@ -0,0 +1,33 @@ +// OpenAI-compatible adapter (used by openai, minimax, openrouter, recraft) + +const ENDPOINTS = { + openai: "https://api.openai.com/v1/images/generations", + minimax: "https://api.minimaxi.com/v1/images/generations", + openrouter: "https://openrouter.ai/api/v1/images/generations", + recraft: "https://external.api.recraft.ai/v1/images/generations", +}; + +export default function createOpenAIAdapter(providerId) { + return { + buildUrl: () => ENDPOINTS[providerId], + buildHeaders: (creds) => { + const headers = { "Content-Type": "application/json" }; + const key = creds?.apiKey || creds?.accessToken; + if (key) headers["Authorization"] = `Bearer ${key}`; + if (providerId === "openrouter") { + headers["HTTP-Referer"] = "https://endpoint-proxy.local"; + headers["X-Title"] = "Endpoint Proxy"; + } + return headers; + }, + buildBody: (model, body) => { + const { prompt, n = 1, size = "1024x1024", quality, style, response_format } = body; + const req = { model, prompt, n, size }; + if (quality) req.quality = quality; + if (style) req.style = style; + if (response_format) req.response_format = response_format; + return req; + }, + normalize: (responseBody) => responseBody, + }; +} diff --git a/open-sse/handlers/imageProviders/runwayml.js b/open-sse/handlers/imageProviders/runwayml.js new file mode 100644 index 00000000..229e9a36 --- /dev/null +++ b/open-sse/handlers/imageProviders/runwayml.js @@ -0,0 +1,47 @@ +// Runway ML — async submit + /tasks/{id} polling +import { sleep, nowSec, sizeToAspectRatio, POLL_INTERVAL_MS, POLL_TIMEOUT_MS } from "./_base.js"; + +const BASE_URL = "https://api.dev.runwayml.com/v1"; + +export default { + async: true, + buildUrl: (model) => { + // Image models (gen4_image*) → text_to_image; video models → image_to_video + return `${BASE_URL}/${model.includes("image") ? "text_to_image" : "image_to_video"}`; + }, + buildHeaders: (creds) => { + const key = creds?.apiKey || creds?.accessToken; + return { + "Content-Type": "application/json", + "Authorization": `Bearer ${key}`, + "X-Runway-Version": "2024-11-06", + }; + }, + buildBody: (model, body) => { + const isVideo = !model.includes("image"); + const ratio = sizeToAspectRatio(body.size); + if (isVideo) { + return { promptText: body.prompt, model, ratio, duration: 5, ...(body.image ? { promptImage: body.image } : {}) }; + } + return { promptText: body.prompt, model, ratio, ...(body.image ? { referenceImages: [{ uri: body.image }] } : {}) }; + }, + async parseResponse(response, { headers }) { + const { id } = await response.json(); + if (!id) throw new Error("Runway: no task id returned"); + const taskUrl = `${BASE_URL}/tasks/${id}`; + const deadline = Date.now() + POLL_TIMEOUT_MS; + while (Date.now() < deadline) { + await sleep(POLL_INTERVAL_MS); + const r = await fetch(taskUrl, { headers }); + if (!r.ok) throw new Error(`Runway status ${r.status}`); + const s = await r.json(); + if (s.status === "SUCCEEDED") return s; + if (s.status === "FAILED" || s.status === "CANCELLED") throw new Error(s.failure || "Runway task failed"); + } + throw new Error("Runway polling timeout"); + }, + normalize: (responseBody) => { + const outputs = Array.isArray(responseBody.output) ? responseBody.output : []; + return { created: nowSec(), data: outputs.map((url) => ({ url })) }; + }, +}; diff --git a/open-sse/handlers/imageProviders/sdwebui.js b/open-sse/handlers/imageProviders/sdwebui.js new file mode 100644 index 00000000..ecabfe55 --- /dev/null +++ b/open-sse/handlers/imageProviders/sdwebui.js @@ -0,0 +1,17 @@ +// SD WebUI (AUTOMATIC1111) — local, noAuth +import { nowSec } from "./_base.js"; + +export default { + noAuth: true, + buildUrl: () => "http://localhost:7860/sdapi/v1/txt2img", + buildHeaders: () => ({ "Content-Type": "application/json" }), + buildBody: (_model, body) => { + const { prompt, n = 1, size = "1024x1024" } = body; + const [width, height] = size.split("x").map(Number); + return { prompt, width: width || 512, height: height || 512, steps: 20, batch_size: n }; + }, + normalize: (responseBody) => { + const images = Array.isArray(responseBody.images) ? responseBody.images.map((img) => ({ b64_json: img })) : []; + return { created: nowSec(), data: images }; + }, +}; diff --git a/open-sse/handlers/imageProviders/stabilityAi.js b/open-sse/handlers/imageProviders/stabilityAi.js new file mode 100644 index 00000000..f5f3fe83 --- /dev/null +++ b/open-sse/handlers/imageProviders/stabilityAi.js @@ -0,0 +1,34 @@ +// Stability AI v2 — sync, returns { image: "" } +import { nowSec, sizeToAspectRatio } from "./_base.js"; + +const BASE_URL = "https://api.stability.ai/v2beta/stable-image/generate"; + +// Map model id → endpoint segment +function modelToEndpoint(model) { + if (model.includes("ultra")) return "ultra"; + if (model.includes("sd3")) return "sd3"; + return "core"; +} + +export default { + buildUrl: (model) => `${BASE_URL}/${modelToEndpoint(model)}`, + buildHeaders: (creds) => { + const key = creds?.apiKey || creds?.accessToken; + return { + "Content-Type": "application/json", + "Authorization": `Bearer ${key}`, + "Accept": "application/json", + }; + }, + buildBody: (model, body) => { + const req = { prompt: body.prompt, output_format: (body.output_format || "png").toLowerCase() }; + if (body.size) req.aspect_ratio = sizeToAspectRatio(body.size); + if (body.style) req.style_preset = body.style; + if (model.includes("sd3")) req.model = model; + return req; + }, + normalize: (responseBody) => { + if (responseBody.image) return { created: nowSec(), data: [{ b64_json: responseBody.image }] }; + return { created: nowSec(), data: [] }; + }, +}; diff --git a/open-sse/handlers/ttsCore.js b/open-sse/handlers/ttsCore.js index 233719f4..ff8c52a9 100644 --- a/open-sse/handlers/ttsCore.js +++ b/open-sse/handlers/ttsCore.js @@ -1,12 +1,15 @@ +import { Buffer } from "node:buffer"; import { createErrorResult } from "../utils/error.js"; import { HTTP_STATUS } from "../config/runtimeConfig.js"; -import { execFile } from "child_process"; -import { promisify } from "util"; -import { mkdtemp, readFile, rm } from "fs/promises"; -import { tmpdir } from "os"; -import { join } from "path"; +import { getTtsAdapter, synthesizeViaConfig } from "./ttsProviders/index.js"; -const execFileAsync = promisify(execFile); +// Re-export voice fetchers + voices APIs for backward compat with existing routes +export { + VOICE_FETCHERS, + fetchEdgeTtsVoices, + fetchLocalDeviceVoices, + fetchElevenLabsVoices, +} from "./ttsProviders/index.js"; // ── Response Formatter (DRY) ─────────────────────────────────── function createTtsResponse(base64Audio, format, responseFormat) { @@ -25,7 +28,7 @@ function createTtsResponse(base64Audio, format, responseFormat) { }; } - // Binary format (default): return raw MP3 + // Binary format (default): return raw audio return { success: true, response: new Response(audioBuffer, { @@ -38,691 +41,11 @@ function createTtsResponse(base64Audio, format, responseFormat) { }; } -// ── Token cache per engine ───────────────────────────────────── -const cache = { - google: { token: null, tokenTime: 0 }, - bing: { token: null, tokenTime: 0 }, -}; - -const GOOGLE_REFRESH = 11 * 60 * 1000; -const BING_REFRESH = 5 * 60 * 1000; // conservative: token TTL is 1h but refresh early - -const UA = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Safari/537.36"; -const SEC_CH_HEADERS = { - "sec-ch-ua": '"Chromium";v="146", "Not-A.Brand";v="24", "Google Chrome";v="146"', - "sec-ch-ua-arch": '"arm"', - "sec-ch-ua-bitness": '"64"', - "sec-ch-ua-full-version": '"146.0.7680.178"', - "sec-ch-ua-full-version-list": '"Chromium";v="146.0.7680.178", "Not-A.Brand";v="24.0.0.0", "Google Chrome";v="146.0.7680.178"', - "sec-ch-ua-mobile": "?0", - "sec-ch-ua-model": '""', - "sec-ch-ua-platform": '"macOS"', - "sec-ch-ua-platform-version": '"15.1.0"', -}; - -// ── Google TTS ───────────────────────────────────────────────── -async function getGoogleToken() { - const now = Date.now(); - if (cache.google.token && now - cache.google.tokenTime < GOOGLE_REFRESH) { - return cache.google.token; - } - const res = await fetch("https://translate.google.com/", { - headers: { "User-Agent": UA }, - }); - if (!res.ok) throw new Error(`Google translate fetch failed: ${res.status}`); - const html = await res.text(); - const fSid = html.match(/"FdrFJe":"(.*?)"/)?.[ 1]; - const bl = html.match(/"cfb2h":"(.*?)"/)?.[ 1]; - if (!fSid || !bl) throw new Error("Failed to parse Google token"); - cache.google.token = { "f.sid": fSid, bl }; - cache.google.tokenTime = now; - return cache.google.token; -} - -let _googleIdx = 0; -async function googleTts(text, lang) { - const token = await getGoogleToken(); - const cleanText = text.replace(/[@^*()\\/\-_+=><"'\u201c\u201d\u3010\u3011]/g, " ").replaceAll(", ", ". "); - const rpcId = "jQ1olc"; - const reqId = (++_googleIdx * 100000) + Math.floor(1000 + Math.random() * 9000); - const query = new URLSearchParams({ - rpcids: rpcId, - "f.sid": token["f.sid"], - bl: token.bl, - hl: lang, - "soc-app": 1, "soc-platform": 1, "soc-device": 1, - _reqid: reqId, - rt: "c", - }); - const payload = [cleanText, lang, null, "undefined", [0]]; - const body = new URLSearchParams(); - body.append("f.req", JSON.stringify([[[rpcId, JSON.stringify(payload), null, "generic"]]])); - const res = await fetch(`https://translate.google.com/_/TranslateWebserverUi/data/batchexecute?${query}`, { - method: "POST", - headers: { "Content-Type": "application/x-www-form-urlencoded", "Referer": "https://translate.google.com/" }, - body: body.toString(), - }); - if (!res.ok) throw new Error(`Google TTS failed: ${res.status}`); - const data = await res.text(); - const split = JSON.parse(data.split("\n")[3]); - const base64 = JSON.parse(split[0][2])[0]; - if (!base64 || base64.length < 100) throw new Error("Google TTS returned empty audio"); - return base64; // base64 MP3 -} - -// ── Bing TTS ─────────────────────────────────────────────────── -async function getBingToken() { - const now = Date.now(); - if (cache.bing.token && now - cache.bing.tokenTime < BING_REFRESH) { - return cache.bing.token; - } - const res = await fetch("https://www.bing.com/translator", { - headers: { "User-Agent": UA, "Accept-Language": "vi,en-US;q=0.9,en;q=0.8" }, - }); - if (!res.ok) throw new Error(`Bing translator fetch failed: ${res.status}`); - const rawCookies = res.headers.getSetCookie?.() || []; - const cookie = rawCookies.map((c) => c.split(";")[0]).join("; "); - const html = await res.text(); - const match = html.match(/params_AbusePreventionHelper\s*=\s*\[([^,]+),([^,]+),/); - if (!match) throw new Error("Failed to parse Bing token"); - cache.bing.token = { key: match[1], token: match[2].replace(/"/g, ""), cookie }; - cache.bing.tokenTime = now; - return cache.bing.token; -} - -async function bingTtsRequest(text, voiceId, token) { - const parts = voiceId.split("-"); - const xmlLang = parts.slice(0, 2).join("-"); - const gender = voiceId.toLowerCase().includes("male") ? "Male" : "Female"; - const ssml = `${text}`; - const body = new URLSearchParams(); - body.append("ssml", ssml); - body.append("token", token.token); - body.append("key", token.key); - return fetch("https://www.bing.com/tfettts?isVertical=1&&IG=1&IID=translator.5023&SFX=1", { - method: "POST", - body: body.toString(), - headers: { - "Content-Type": "application/x-www-form-urlencoded", - "Accept": "*/*", - "Origin": "https://www.bing.com", - "Referer": "https://www.bing.com/translator", - "User-Agent": UA, - ...(token.cookie ? { "Cookie": token.cookie } : {}), - }, - }); -} - -async function bingTts(text, voiceId) { - let token = await getBingToken(); - let res = await bingTtsRequest(text, voiceId, token); - - // On 429/captcha: invalidate cache and retry once with fresh token - if (res.status === 429 || res.status === 403) { - cache.bing.token = null; - cache.bing.tokenTime = 0; - token = await getBingToken(); - res = await bingTtsRequest(text, voiceId, token); - } - - if (!res.ok) { - const body = await res.text().catch(() => ""); - throw new Error(`Bing TTS failed: ${res.status}${body ? " - " + body : ""}`); - } - const buf = await res.arrayBuffer(); - if (buf.byteLength < 1024) throw new Error("Bing TTS returned empty audio"); - return Buffer.from(buf).toString("base64"); // base64 MP3 -} - -// ── Local Device TTS (macOS `say` + Windows SAPI + ffmpeg) ────── -let _localVoicesCache = null; - -async function fetchLocalDeviceVoicesMac() { - const { stdout } = await execFileAsync("say", ["-v", "?"]); - const voices = []; - for (const line of stdout.split("\n")) { - // Format: "Name locale # sample" - const m = line.match(/^([^\s].*?)\s{2,}([a-z]{2}_[A-Z]{2})/); - if (!m) continue; - const name = m[1].trim(); - const locale = m[2].trim(); // e.g. en_US - const lang = locale.split("_")[0]; - const country = locale.split("_")[1]; - voices.push({ id: name, name, locale, lang, country, gender: "" }); - } - return voices; -} - -async function fetchLocalDeviceVoicesWin() { - // Use -WindowStyle Hidden to suppress PowerShell popup window - const script = [ - "Add-Type -AssemblyName System.Speech;", - "$s = New-Object System.Speech.Synthesis.SpeechSynthesizer;", - "$s.GetInstalledVoices() | ForEach-Object { $v = $_.VoiceInfo;", - "[PSCustomObject]@{ Name=$v.Name; Culture=$v.Culture.Name; Gender=$v.Gender } }", - "| ConvertTo-Json -Compress", - ].join(" "); - const { stdout } = await execFileAsync( - "powershell.exe", - ["-NoProfile", "-NonInteractive", "-WindowStyle", "Hidden", "-Command", script], - { windowsHide: true } - ); - const raw = JSON.parse(stdout.trim() || "[]"); - // Normalize: single object → array - const list = Array.isArray(raw) ? raw : [raw]; - return list.map((v) => { - const culture = v.Culture || "en-US"; - const [lang, country = ""] = culture.split("-"); - // Gender: 0=NotSet, 1=Male, 2=Female (SAPI enum) - const genderMap = { 1: "Male", 2: "Female", Male: "Male", Female: "Female" }; - return { - id: v.Name, - name: v.Name, - locale: culture.replace("-", "_"), - lang, - country, - gender: genderMap[v.Gender] || "", - }; - }); -} - -export async function fetchLocalDeviceVoices() { - if (_localVoicesCache) return _localVoicesCache; - try { - const voices = process.platform === "win32" - ? await fetchLocalDeviceVoicesWin() - : await fetchLocalDeviceVoicesMac(); - _localVoicesCache = voices; - return voices; - } catch { - return []; - } -} - -async function localDeviceTts(text, voiceId) { - const dir = await mkdtemp(join(tmpdir(), "tts-")); - const aiffPath = join(dir, "out.aiff"); - const mp3Path = join(dir, "out.mp3"); - try { - const args = voiceId ? ["-v", voiceId, "-o", aiffPath, text] : ["-o", aiffPath, text]; - await execFileAsync("say", args); - await execFileAsync("ffmpeg", ["-y", "-i", aiffPath, "-codec:a", "libmp3lame", "-qscale:a", "4", mp3Path]); - const buf = await readFile(mp3Path); - return buf.toString("base64"); - } finally { - await rm(dir, { recursive: true, force: true }); - } -} - -// ── Voices list (Edge TTS public endpoint) ───────────────────── -let _voicesCache = null; -let _voicesCacheTime = 0; -const VOICES_TTL = 24 * 60 * 60 * 1000; - -export async function fetchEdgeTtsVoices() { - const now = Date.now(); - if (_voicesCache && now - _voicesCacheTime < VOICES_TTL) return _voicesCache; - const res = await fetch( - "https://speech.platform.bing.com/consumer/speech/synthesize/readaloud/voices/list?trustedclienttoken=6A5AA1D4EAFF4E9FB37E23D68491D6F4", - { headers: { "User-Agent": UA } } - ); - if (!res.ok) throw new Error(`Edge TTS voices fetch failed: ${res.status}`); - const voices = await res.json(); - _voicesCache = voices; - _voicesCacheTime = now; - return voices; -} - -// ── ElevenLabs TTS ───────────────────────────────────────────── -const _elevenlabsVoicesCache = new Map(); // Cache by API key - -export async function fetchElevenLabsVoices(apiKey) { - if (!apiKey) throw new Error("ElevenLabs API key required"); - - const now = Date.now(); - const cached = _elevenlabsVoicesCache.get(apiKey); - if (cached && now - cached.time < VOICES_TTL) { - return cached.voices; - } - - const res = await fetch("https://api.elevenlabs.io/v1/voices", { - headers: { - "xi-api-key": apiKey, - "Content-Type": "application/json", - }, - }); - - if (!res.ok) throw new Error(`ElevenLabs voices fetch failed: ${res.status}`); - const data = await res.json(); - // Normalize: add lang from labels.language for grouping - const voices = (data.voices || []).map((v) => ({ - ...v, - lang: v.labels?.language || "en", - })); - _elevenlabsVoicesCache.set(apiKey, { voices, time: now }); - return voices; -} - -async function elevenlabsTts(text, voiceId, apiKey, modelId = "eleven_flash_v2_5") { - const res = await fetch(`https://api.elevenlabs.io/v1/text-to-speech/${voiceId}`, { - method: "POST", - headers: { - "xi-api-key": apiKey, - "Content-Type": "application/json", - }, - body: JSON.stringify({ - text, - model_id: modelId, - voice_settings: { - stability: 0.5, - similarity_boost: 0.75, - }, - }), - }); - - if (!res.ok) { - const err = await res.json().catch(() => ({})); - throw new Error(err?.detail?.message || `ElevenLabs TTS failed: ${res.status}`); - } - - const buf = await res.arrayBuffer(); - if (buf.byteLength < 1024) throw new Error("ElevenLabs TTS returned empty audio"); - return Buffer.from(buf).toString("base64"); -} - -// ── Voice Fetcher Registry (DRY) ─────────────────────────────── -export const VOICE_FETCHERS = { - "edge-tts": fetchEdgeTtsVoices, - "local-device": fetchLocalDeviceVoices, - "elevenlabs": fetchElevenLabsVoices, - // google-tts: uses hardcoded language codes - // openai: uses hardcoded voices from providerModels.js -}; - -// ── OpenRouter TTS (via chat completions + audio modality) ─────────────────── -async function handleOpenRouterTts({ model, input, credentials, responseFormat = "mp3" }) { - if (!credentials?.apiKey) { - return createErrorResult(HTTP_STATUS.UNAUTHORIZED, "No OpenRouter API key configured"); - } - - // model format: "tts-model/voice" e.g. "openai/gpt-4o-mini-tts/alloy" - let ttsModel = "openai/gpt-4o-mini-tts"; - let voice = "alloy"; - if (model && model.includes("/")) { - const lastSlash = model.lastIndexOf("/"); - const maybVoice = model.slice(lastSlash + 1); - const maybeModel = model.slice(0, lastSlash); - // voice names are simple lowercase words, model names contain "/" - if (maybeModel.includes("/")) { - ttsModel = maybeModel; - voice = maybVoice; - } else { - voice = model; - } - } else if (model) { - voice = model; - } - - const res = await fetch("https://openrouter.ai/api/v1/chat/completions", { - method: "POST", - headers: { - "Content-Type": "application/json", - "Authorization": `Bearer ${credentials.apiKey}`, - "HTTP-Referer": "https://endpoint-proxy.local", - "X-Title": "Endpoint Proxy", - }, - body: JSON.stringify({ - model: ttsModel, - modalities: ["text", "audio"], - audio: { voice, format: "wav" }, - stream: true, - messages: [{ role: "user", content: input }], - }), - }); - - if (!res.ok) { - const err = await res.json().catch(() => ({})); - return createErrorResult(res.status, err?.error?.message || `OpenRouter TTS failed: ${res.status}`); - } - - // Parse SSE stream, accumulate base64 audio chunks - const chunks = []; - const reader = res.body.getReader(); - const decoder = new TextDecoder(); - let buffer = ""; - - while (true) { - const { done, value } = await reader.read(); - if (done) break; - buffer += decoder.decode(value, { stream: true }); - - const lines = buffer.split("\n"); - buffer = lines.pop(); - - for (const line of lines) { - if (!line.startsWith("data: ") || line === "data: [DONE]") continue; - try { - const json = JSON.parse(line.slice(6)); - const audioData = json.choices?.[0]?.delta?.audio?.data; - if (audioData) chunks.push(audioData); - } catch {} - } - } - - if (chunks.length === 0) { - return createErrorResult(HTTP_STATUS.BAD_GATEWAY, "OpenRouter TTS returned no audio data"); - } - - const base64Audio = chunks.join(""); - return createTtsResponse(base64Audio, "wav", responseFormat); -} - -// ── OpenAI TTS ─────────────────────────────────────────────────────────────── -async function handleOpenAiTts({ model, input, credentials, responseFormat = "mp3" }) { - if (!credentials?.apiKey) { - return createErrorResult(HTTP_STATUS.UNAUTHORIZED, "No OpenAI API key configured"); - } - - // model format: "tts-model/voice" e.g. "tts-1/alloy" or "gpt-4o-mini-tts/nova" - let ttsModel = "gpt-4o-mini-tts"; - let voice = "alloy"; - if (model && model.includes("/")) { - const parts = model.split("/"); - if (parts.length === 2) { - [ttsModel, voice] = parts; - } - } else if (model) { - voice = model; - } - - const baseUrl = (credentials.baseUrl || "https://api.openai.com").replace(/\/+$/, ""); - const res = await fetch(`${baseUrl}/v1/audio/speech`, { - method: "POST", - headers: { - "Content-Type": "application/json", - "Authorization": `Bearer ${credentials.apiKey}`, - }, - body: JSON.stringify({ model: ttsModel, voice, input }), - }); - - if (!res.ok) { - const err = await res.json().catch(() => ({})); - return createErrorResult(res.status, err?.error?.message || `OpenAI TTS failed: ${res.status}`); - } - - const buf = await res.arrayBuffer(); - const base64 = Buffer.from(buf).toString("base64"); - return createTtsResponse(base64, "mp3", responseFormat); -} - -// ── Generic TTS Format Handlers (config-driven via ttsConfig.format) ────── -// Parse `model` string as "modelId/voiceId" or "modelId" (modelId may contain slashes — match against known list) -function parseModelVoice(model, defaultModel = "", defaultVoice = "", knownModels = []) { - if (!model) return { modelId: defaultModel, voiceId: defaultVoice }; - // Find longest known model id that prefixes `model` - const known = knownModels.map((m) => m.id || m).filter(Boolean).sort((a, b) => b.length - a.length); - for (const id of known) { - if (model === id) return { modelId: id, voiceId: defaultVoice }; - if (model.startsWith(`${id}/`)) return { modelId: id, voiceId: model.slice(id.length + 1) }; - } - // Fallback: split on last "/" so "vendor/model/voice" → model="vendor/model", voice="voice" - const idx = model.lastIndexOf("/"); - if (idx > 0) return { modelId: model.slice(0, idx), voiceId: model.slice(idx + 1) }; - return { modelId: defaultModel || model, voiceId: defaultVoice || model }; -} - -// Convert upstream Response (binary audio) to { base64, format } -async function responseToBase64(res, defaultFormat = "mp3") { - const buf = await res.arrayBuffer(); - if (buf.byteLength < 100) throw new Error("Upstream returned empty audio"); - const ctype = res.headers.get("content-type") || ""; - let format = defaultFormat; - if (ctype.includes("wav")) format = "wav"; - else if (ctype.includes("mpeg") || ctype.includes("mp3")) format = "mp3"; - else if (ctype.includes("ogg")) format = "ogg"; - return { base64: Buffer.from(buf).toString("base64"), format }; -} - -async function throwUpstreamError(res) { - const text = await res.text().catch(() => ""); - let msg = `Upstream error (${res.status})`; - try { - const parsed = JSON.parse(text); - msg = parsed?.error?.message || parsed?.message || parsed?.detail?.message || (typeof parsed?.detail === "string" ? parsed.detail : null) || text || msg; - } catch { msg = text || msg; } - throw new Error(msg); -} - -// Hyperbolic: POST { text } → { audio: base64 } -async function ttsHyperbolic({ baseUrl, apiKey, text }) { - const res = await fetch(baseUrl, { - method: "POST", - headers: { "Content-Type": "application/json", "Authorization": `Bearer ${apiKey}` }, - body: JSON.stringify({ text }), - }); - if (!res.ok) await throwUpstreamError(res); - const data = await res.json(); - return { base64: data.audio, format: "mp3" }; -} - -// Deepgram: model via query, Token auth, returns binary -async function ttsDeepgram({ baseUrl, apiKey, text, modelId }) { - const url = new URL(baseUrl); - url.searchParams.set("model", modelId || "aura-asteria-en"); - const res = await fetch(url.toString(), { - method: "POST", - headers: { "Content-Type": "application/json", "Authorization": `Token ${apiKey}` }, - body: JSON.stringify({ text }), - }); - if (!res.ok) await throwUpstreamError(res); - return responseToBase64(res, "mp3"); -} - -// Nvidia NIM: POST { input: { text }, voice, model } → binary -async function ttsNvidia({ baseUrl, apiKey, text, modelId, voiceId }) { - const res = await fetch(baseUrl, { - method: "POST", - headers: { "Content-Type": "application/json", "Authorization": `Bearer ${apiKey}` }, - body: JSON.stringify({ input: { text }, voice: voiceId || "default", model: modelId }), - }); - if (!res.ok) await throwUpstreamError(res); - return responseToBase64(res, "wav"); -} - -// HuggingFace: POST {baseUrl}/{modelId} { inputs: text } → binary -async function ttsHuggingFace({ baseUrl, apiKey, text, modelId }) { - if (!modelId || modelId.includes("..")) throw new Error("Invalid HuggingFace model ID"); - const res = await fetch(`${baseUrl}/${modelId}`, { - method: "POST", - headers: { "Content-Type": "application/json", "Authorization": `Bearer ${apiKey}` }, - body: JSON.stringify({ inputs: text }), - }); - if (!res.ok) await throwUpstreamError(res); - return responseToBase64(res, "wav"); -} - -// Inworld: POST { text, voiceId, modelId, audioConfig } → JSON { audioContent } -async function ttsInworld({ baseUrl, apiKey, text, modelId, voiceId }) { - const res = await fetch(baseUrl, { - method: "POST", - headers: { "Content-Type": "application/json", "Authorization": `Basic ${apiKey}` }, - body: JSON.stringify({ - text, - voiceId: voiceId || "Alex", - modelId: modelId || "inworld-tts-1.5-mini", - audioConfig: { audioEncoding: "MP3" }, - }), - }); - if (!res.ok) await throwUpstreamError(res); - const data = await res.json(); - if (!data.audioContent) throw new Error("Inworld TTS returned no audio"); - return { base64: data.audioContent, format: "mp3" }; -} - -// Cartesia: POST { model_id, transcript, voice, output_format } → binary -async function ttsCartesia({ baseUrl, apiKey, text, modelId, voiceId }) { - const res = await fetch(baseUrl, { - method: "POST", - headers: { - "Content-Type": "application/json", - "X-API-Key": apiKey, - "Cartesia-Version": "2024-06-10", - }, - body: JSON.stringify({ - model_id: modelId || "sonic-2", - transcript: text, - ...(voiceId ? { voice: { mode: "id", id: voiceId } } : {}), - output_format: { container: "mp3", bit_rate: 128000, sample_rate: 44100 }, - }), - }); - if (!res.ok) await throwUpstreamError(res); - return responseToBase64(res, "mp3"); -} - -// PlayHT: token format "userId:apiKey", voice = s3 URL -async function ttsPlayHt({ baseUrl, apiKey, text, modelId, voiceId }) { - const [userId, key] = (apiKey || ":").split(":"); - const res = await fetch(baseUrl, { - method: "POST", - headers: { - "Content-Type": "application/json", - "Accept": "audio/mpeg", - "X-USER-ID": userId || "", - "Authorization": `Bearer ${key || apiKey}`, - }, - body: JSON.stringify({ - text, - voice: voiceId || "s3://voice-cloning-zero-shot/d9ff78ba-d016-47f6-b0ef-dd630f59414e/female-cs/manifest.json", - voice_engine: modelId || "PlayDialog", - output_format: "mp3", - speed: 1, - }), - }); - if (!res.ok) await throwUpstreamError(res); - return responseToBase64(res, "mp3"); -} - -// Coqui (local, noAuth): POST { text, speaker_id } → WAV -async function ttsCoqui({ baseUrl, text, voiceId }) { - const res = await fetch(baseUrl, { - method: "POST", - headers: { "Content-Type": "application/json" }, - body: JSON.stringify({ text, ...(voiceId ? { speaker_id: voiceId } : {}) }), - }); - if (!res.ok) await throwUpstreamError(res); - return responseToBase64(res, "wav"); -} - -// Tortoise (local, noAuth): POST { text, voice } → binary -async function ttsTortoise({ baseUrl, text, voiceId }) { - const res = await fetch(baseUrl, { - method: "POST", - headers: { "Content-Type": "application/json" }, - body: JSON.stringify({ text, voice: voiceId || "random" }), - }); - if (!res.ok) await throwUpstreamError(res); - return responseToBase64(res, "wav"); -} - -// OpenAI-compatible (qwen3-tts, openai-compat): POST { model, input, voice } → binary -async function ttsOpenAiCompat({ baseUrl, apiKey, text, modelId, voiceId }) { - const headers = { "Content-Type": "application/json" }; - if (apiKey) headers["Authorization"] = `Bearer ${apiKey}`; - const res = await fetch(baseUrl, { - method: "POST", - headers, - body: JSON.stringify({ - model: modelId, - input: text, - voice: voiceId || "alloy", - response_format: "mp3", - speed: 1.0, - }), - }); - if (!res.ok) await throwUpstreamError(res); - return responseToBase64(res, "mp3"); -} - -// Format → handler dispatcher (DRY) -const FORMAT_HANDLERS = { - hyperbolic: ttsHyperbolic, - deepgram: ttsDeepgram, - "nvidia-tts": ttsNvidia, - "huggingface-tts": ttsHuggingFace, - inworld: ttsInworld, - cartesia: ttsCartesia, - playht: ttsPlayHt, - coqui: ttsCoqui, - tortoise: ttsTortoise, - openai: ttsOpenAiCompat, -}; - -// ── TTS Provider Registry (legacy noAuth + special providers) ────────── -const TTS_PROVIDERS = { - "google-tts": { - synthesize: async (text, model) => { - const base64 = await googleTts(text, model || "en"); - return { base64, format: "mp3" }; - }, - requiresCredentials: false, - }, - "edge-tts": { - synthesize: async (text, model) => { - const base64 = await bingTts(text, model || "vi-VN-HoaiMyNeural"); - return { base64, format: "mp3" }; - }, - requiresCredentials: false, - }, - "local-device": { - synthesize: async (text, model) => { - const base64 = await localDeviceTts(text, model); - return { base64, format: "mp3" }; - }, - requiresCredentials: false, - }, - "elevenlabs": { - synthesize: async (text, model, credentials) => { - if (!credentials?.apiKey) throw new Error("ElevenLabs API key required"); - let modelId = "eleven_flash_v2_5"; - let voiceId = model; - if (model && model.includes("/")) [modelId, voiceId] = model.split("/"); - const base64 = await elevenlabsTts(text, voiceId, credentials.apiKey, modelId); - return { base64, format: "mp3" }; - }, - requiresCredentials: true, - }, - "openai": { - synthesize: async (text, model, credentials, responseFormat) => { - return await handleOpenAiTts({ model, input: text, credentials, responseFormat }); - }, - requiresCredentials: true, - }, - "openrouter": { - synthesize: async (text, model, credentials, responseFormat) => { - return await handleOpenRouterTts({ model, input: text, credentials, responseFormat }); - }, - requiresCredentials: true, - }, -}; - -// ── Generic dispatcher: providers with ttsConfig.format ──────────────── -// Resolves to TTS_PROVIDERS first; falls back to ttsConfig.format dispatch. -async function synthesizeViaConfig(provider, text, model, credentials) { - const { AI_PROVIDERS } = await import("@/shared/constants/providers"); - const cfg = AI_PROVIDERS[provider]?.ttsConfig; - if (!cfg) return null; - const handler = FORMAT_HANDLERS[cfg.format]; - if (!handler) return null; - const apiKey = credentials?.apiKey; - if (cfg.authType !== "none" && !apiKey) throw new Error(`${provider} API key required`); - const defaultModel = cfg.models?.[0]?.id || ""; - const { modelId, voiceId } = parseModelVoice(model, defaultModel, "", cfg.models || []); - return handler({ baseUrl: cfg.baseUrl, apiKey, text, modelId, voiceId }); -} - // ── Core handler ─────────────────────────────────────────────── /** - * Synthesize text to audio. + * Synthesize text to audio. Provider logic lives in `./ttsProviders/{id}.js` + * or is dispatched generically via `ttsConfig.format`. + * * @returns {Promise<{success, response, status?, error?}>} */ export async function handleTtsCore({ provider, model, input, credentials, responseFormat = "mp3" }) { @@ -730,17 +53,17 @@ export async function handleTtsCore({ provider, model, input, credentials, respo return createErrorResult(HTTP_STATUS.BAD_REQUEST, "Missing required field: input"); } - const ttsProvider = TTS_PROVIDERS[provider]; - try { - // Legacy/special providers (google-tts, edge-tts, local-device, elevenlabs, openai, openrouter) - if (ttsProvider) { - const result = await ttsProvider.synthesize(input.trim(), model, credentials, responseFormat); + // Special-case adapters (google-tts, edge-tts, local-device, elevenlabs, openai, openrouter) + const adapter = getTtsAdapter(provider); + if (adapter) { + const result = await adapter.synthesize(input.trim(), model, credentials, responseFormat); + // Adapter may return a full {success, response} (legacy) or {base64, format} if (result.success !== undefined) return result; return createTtsResponse(result.base64, result.format, responseFormat); } - // Generic config-driven dispatcher (hyperbolic, deepgram, nvidia, huggingface, inworld, cartesia, playht, coqui, tortoise, qwen, ...) + // Generic config-driven (hyperbolic, deepgram, nvidia, huggingface, inworld, cartesia, playht, coqui, tortoise, qwen, ...) const result = await synthesizeViaConfig(provider, input.trim(), model, credentials); if (result) return createTtsResponse(result.base64, result.format, responseFormat); diff --git a/open-sse/handlers/ttsProviders/_base.js b/open-sse/handlers/ttsProviders/_base.js new file mode 100644 index 00000000..1c922118 --- /dev/null +++ b/open-sse/handlers/ttsProviders/_base.js @@ -0,0 +1,39 @@ +// Shared TTS helpers +import { Buffer } from "node:buffer"; + +export const UA = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Safari/537.36"; + +// Convert upstream Response (binary audio) to { base64, format } +export async function responseToBase64(res, defaultFormat = "mp3") { + const buf = await res.arrayBuffer(); + if (buf.byteLength < 100) throw new Error("Upstream returned empty audio"); + const ctype = res.headers.get("content-type") || ""; + let format = defaultFormat; + if (ctype.includes("wav")) format = "wav"; + else if (ctype.includes("mpeg") || ctype.includes("mp3")) format = "mp3"; + else if (ctype.includes("ogg")) format = "ogg"; + return { base64: Buffer.from(buf).toString("base64"), format }; +} + +export async function throwUpstreamError(res) { + const text = await res.text().catch(() => ""); + let msg = `Upstream error (${res.status})`; + try { + const parsed = JSON.parse(text); + msg = parsed?.error?.message || parsed?.message || parsed?.detail?.message || (typeof parsed?.detail === "string" ? parsed.detail : null) || text || msg; + } catch { msg = text || msg; } + throw new Error(msg); +} + +// Parse `model` string as "modelId/voiceId" — match against known model list (longest prefix wins) +export function parseModelVoice(model, defaultModel = "", defaultVoice = "", knownModels = []) { + if (!model) return { modelId: defaultModel, voiceId: defaultVoice }; + const known = knownModels.map((m) => m.id || m).filter(Boolean).sort((a, b) => b.length - a.length); + for (const id of known) { + if (model === id) return { modelId: id, voiceId: defaultVoice }; + if (model.startsWith(`${id}/`)) return { modelId: id, voiceId: model.slice(id.length + 1) }; + } + const idx = model.lastIndexOf("/"); + if (idx > 0) return { modelId: model.slice(0, idx), voiceId: model.slice(idx + 1) }; + return { modelId: defaultModel || model, voiceId: defaultVoice || model }; +} diff --git a/open-sse/handlers/ttsProviders/edgeTts.js b/open-sse/handlers/ttsProviders/edgeTts.js new file mode 100644 index 00000000..66b1dd9e --- /dev/null +++ b/open-sse/handlers/ttsProviders/edgeTts.js @@ -0,0 +1,89 @@ +// Microsoft Edge / Bing TTS (no auth) — via Bing translator endpoint +import { Buffer } from "node:buffer"; +import { UA } from "./_base.js"; + +const REFRESH_MS = 5 * 60 * 1000; // token TTL ~1h, refresh early +const VOICES_TTL = 24 * 60 * 60 * 1000; + +const cache = { token: null, tokenTime: 0 }; +let _voicesCache = null; +let _voicesCacheTime = 0; + +async function getToken() { + const now = Date.now(); + if (cache.token && now - cache.tokenTime < REFRESH_MS) return cache.token; + const res = await fetch("https://www.bing.com/translator", { + headers: { "User-Agent": UA, "Accept-Language": "vi,en-US;q=0.9,en;q=0.8" }, + }); + if (!res.ok) throw new Error(`Bing translator fetch failed: ${res.status}`); + const rawCookies = res.headers.getSetCookie?.() || []; + const cookie = rawCookies.map((c) => c.split(";")[0]).join("; "); + const html = await res.text(); + const match = html.match(/params_AbusePreventionHelper\s*=\s*\[([^,]+),([^,]+),/); + if (!match) throw new Error("Failed to parse Bing token"); + cache.token = { key: match[1], token: match[2].replace(/"/g, ""), cookie }; + cache.tokenTime = now; + return cache.token; +} + +async function ttsRequest(text, voiceId, token) { + const parts = voiceId.split("-"); + const xmlLang = parts.slice(0, 2).join("-"); + const gender = voiceId.toLowerCase().includes("male") ? "Male" : "Female"; + const ssml = `${text}`; + const body = new URLSearchParams(); + body.append("ssml", ssml); + body.append("token", token.token); + body.append("key", token.key); + return fetch("https://www.bing.com/tfettts?isVertical=1&&IG=1&IID=translator.5023&SFX=1", { + method: "POST", + body: body.toString(), + headers: { + "Content-Type": "application/x-www-form-urlencoded", + "Accept": "*/*", + "Origin": "https://www.bing.com", + "Referer": "https://www.bing.com/translator", + "User-Agent": UA, + ...(token.cookie ? { "Cookie": token.cookie } : {}), + }, + }); +} + +export async function fetchEdgeTtsVoices() { + const now = Date.now(); + if (_voicesCache && now - _voicesCacheTime < VOICES_TTL) return _voicesCache; + const res = await fetch( + "https://speech.platform.bing.com/consumer/speech/synthesize/readaloud/voices/list?trustedclienttoken=6A5AA1D4EAFF4E9FB37E23D68491D6F4", + { headers: { "User-Agent": UA } } + ); + if (!res.ok) throw new Error(`Edge TTS voices fetch failed: ${res.status}`); + const voices = await res.json(); + _voicesCache = voices; + _voicesCacheTime = now; + return voices; +} + +export default { + noAuth: true, + async synthesize(text, model) { + const voiceId = model || "vi-VN-HoaiMyNeural"; + let token = await getToken(); + let res = await ttsRequest(text, voiceId, token); + + // 429/403: invalidate cache and retry once + if (res.status === 429 || res.status === 403) { + cache.token = null; + cache.tokenTime = 0; + token = await getToken(); + res = await ttsRequest(text, voiceId, token); + } + + if (!res.ok) { + const body = await res.text().catch(() => ""); + throw new Error(`Bing TTS failed: ${res.status}${body ? " - " + body : ""}`); + } + const buf = await res.arrayBuffer(); + if (buf.byteLength < 1024) throw new Error("Bing TTS returned empty audio"); + return { base64: Buffer.from(buf).toString("base64"), format: "mp3" }; + }, +}; diff --git a/open-sse/handlers/ttsProviders/elevenlabs.js b/open-sse/handlers/ttsProviders/elevenlabs.js new file mode 100644 index 00000000..711a5672 --- /dev/null +++ b/open-sse/handlers/ttsProviders/elevenlabs.js @@ -0,0 +1,48 @@ +// ElevenLabs TTS — voice id with optional model_id prefix +import { Buffer } from "node:buffer"; + +const VOICES_TTL = 24 * 60 * 60 * 1000; +const _voicesCache = new Map(); // by API key + +export async function fetchElevenLabsVoices(apiKey) { + if (!apiKey) throw new Error("ElevenLabs API key required"); + const now = Date.now(); + const cached = _voicesCache.get(apiKey); + if (cached && now - cached.time < VOICES_TTL) return cached.voices; + + const res = await fetch("https://api.elevenlabs.io/v1/voices", { + headers: { "xi-api-key": apiKey, "Content-Type": "application/json" }, + }); + if (!res.ok) throw new Error(`ElevenLabs voices fetch failed: ${res.status}`); + const data = await res.json(); + // Normalize: derive lang from labels for grouping + const voices = (data.voices || []).map((v) => ({ ...v, lang: v.labels?.language || "en" })); + _voicesCache.set(apiKey, { voices, time: now }); + return voices; +} + +export default { + async synthesize(text, model, credentials) { + if (!credentials?.apiKey) throw new Error("ElevenLabs API key required"); + let modelId = "eleven_flash_v2_5"; + let voiceId = model; + if (model && model.includes("/")) [modelId, voiceId] = model.split("/"); + + const res = await fetch(`https://api.elevenlabs.io/v1/text-to-speech/${voiceId}`, { + method: "POST", + headers: { "xi-api-key": credentials.apiKey, "Content-Type": "application/json" }, + body: JSON.stringify({ + text, + model_id: modelId, + voice_settings: { stability: 0.5, similarity_boost: 0.75 }, + }), + }); + if (!res.ok) { + const err = await res.json().catch(() => ({})); + throw new Error(err?.detail?.message || `ElevenLabs TTS failed: ${res.status}`); + } + const buf = await res.arrayBuffer(); + if (buf.byteLength < 1024) throw new Error("ElevenLabs TTS returned empty audio"); + return { base64: Buffer.from(buf).toString("base64"), format: "mp3" }; + }, +}; diff --git a/open-sse/handlers/ttsProviders/genericFormats.js b/open-sse/handlers/ttsProviders/genericFormats.js new file mode 100644 index 00000000..148d5a1f --- /dev/null +++ b/open-sse/handlers/ttsProviders/genericFormats.js @@ -0,0 +1,167 @@ +// Generic config-driven TTS handlers — dispatched by ttsConfig.format. +// Each handler accepts { baseUrl, apiKey, text, modelId, voiceId } and returns { base64, format }. +import { responseToBase64, throwUpstreamError } from "./_base.js"; + +// Hyperbolic: POST { text } → { audio: base64 } +async function hyperbolic({ baseUrl, apiKey, text }) { + const res = await fetch(baseUrl, { + method: "POST", + headers: { "Content-Type": "application/json", "Authorization": `Bearer ${apiKey}` }, + body: JSON.stringify({ text }), + }); + if (!res.ok) await throwUpstreamError(res); + const data = await res.json(); + return { base64: data.audio, format: "mp3" }; +} + +// Deepgram: model via query, Token auth, returns binary +async function deepgram({ baseUrl, apiKey, text, modelId }) { + const url = new URL(baseUrl); + url.searchParams.set("model", modelId || "aura-asteria-en"); + const res = await fetch(url.toString(), { + method: "POST", + headers: { "Content-Type": "application/json", "Authorization": `Token ${apiKey}` }, + body: JSON.stringify({ text }), + }); + if (!res.ok) await throwUpstreamError(res); + return responseToBase64(res, "mp3"); +} + +// Nvidia NIM: POST { input: { text }, voice, model } → binary +async function nvidia({ baseUrl, apiKey, text, modelId, voiceId }) { + const res = await fetch(baseUrl, { + method: "POST", + headers: { "Content-Type": "application/json", "Authorization": `Bearer ${apiKey}` }, + body: JSON.stringify({ input: { text }, voice: voiceId || "default", model: modelId }), + }); + if (!res.ok) await throwUpstreamError(res); + return responseToBase64(res, "wav"); +} + +// HuggingFace: POST {baseUrl}/{modelId} { inputs: text } → binary +async function huggingface({ baseUrl, apiKey, text, modelId }) { + if (!modelId || modelId.includes("..")) throw new Error("Invalid HuggingFace model ID"); + const res = await fetch(`${baseUrl}/${modelId}`, { + method: "POST", + headers: { "Content-Type": "application/json", "Authorization": `Bearer ${apiKey}` }, + body: JSON.stringify({ inputs: text }), + }); + if (!res.ok) await throwUpstreamError(res); + return responseToBase64(res, "wav"); +} + +// Inworld: Basic auth, JSON { audioContent } +async function inworld({ baseUrl, apiKey, text, modelId, voiceId }) { + const res = await fetch(baseUrl, { + method: "POST", + headers: { "Content-Type": "application/json", "Authorization": `Basic ${apiKey}` }, + body: JSON.stringify({ + text, + voiceId: voiceId || "Alex", + modelId: modelId || "inworld-tts-1.5-mini", + audioConfig: { audioEncoding: "MP3" }, + }), + }); + if (!res.ok) await throwUpstreamError(res); + const data = await res.json(); + if (!data.audioContent) throw new Error("Inworld TTS returned no audio"); + return { base64: data.audioContent, format: "mp3" }; +} + +// Cartesia: X-API-Key header +async function cartesia({ baseUrl, apiKey, text, modelId, voiceId }) { + const res = await fetch(baseUrl, { + method: "POST", + headers: { + "Content-Type": "application/json", + "X-API-Key": apiKey, + "Cartesia-Version": "2024-06-10", + }, + body: JSON.stringify({ + model_id: modelId || "sonic-2", + transcript: text, + ...(voiceId ? { voice: { mode: "id", id: voiceId } } : {}), + output_format: { container: "mp3", bit_rate: 128000, sample_rate: 44100 }, + }), + }); + if (!res.ok) await throwUpstreamError(res); + return responseToBase64(res, "mp3"); +} + +// PlayHT: token format "userId:apiKey", voice = s3 URL +async function playht({ baseUrl, apiKey, text, modelId, voiceId }) { + const [userId, key] = (apiKey || ":").split(":"); + const res = await fetch(baseUrl, { + method: "POST", + headers: { + "Content-Type": "application/json", + "Accept": "audio/mpeg", + "X-USER-ID": userId || "", + "Authorization": `Bearer ${key || apiKey}`, + }, + body: JSON.stringify({ + text, + voice: voiceId || "s3://voice-cloning-zero-shot/d9ff78ba-d016-47f6-b0ef-dd630f59414e/female-cs/manifest.json", + voice_engine: modelId || "PlayDialog", + output_format: "mp3", + speed: 1, + }), + }); + if (!res.ok) await throwUpstreamError(res); + return responseToBase64(res, "mp3"); +} + +// Coqui (local, noAuth): POST { text, speaker_id } → WAV +async function coqui({ baseUrl, text, voiceId }) { + const res = await fetch(baseUrl, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ text, ...(voiceId ? { speaker_id: voiceId } : {}) }), + }); + if (!res.ok) await throwUpstreamError(res); + return responseToBase64(res, "wav"); +} + +// Tortoise (local, noAuth) +async function tortoise({ baseUrl, text, voiceId }) { + const res = await fetch(baseUrl, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ text, voice: voiceId || "random" }), + }); + if (!res.ok) await throwUpstreamError(res); + return responseToBase64(res, "wav"); +} + +// OpenAI-compatible upstream (qwen3-tts, etc.) +async function openaiCompat({ baseUrl, apiKey, text, modelId, voiceId }) { + const headers = { "Content-Type": "application/json" }; + if (apiKey) headers["Authorization"] = `Bearer ${apiKey}`; + const res = await fetch(baseUrl, { + method: "POST", + headers, + body: JSON.stringify({ + model: modelId, + input: text, + voice: voiceId || "alloy", + response_format: "mp3", + speed: 1.0, + }), + }); + if (!res.ok) await throwUpstreamError(res); + return responseToBase64(res, "mp3"); +} + +// format → handler dispatcher +export const FORMAT_HANDLERS = { + hyperbolic, + deepgram, + "nvidia-tts": nvidia, + "huggingface-tts": huggingface, + inworld, + cartesia, + playht, + coqui, + tortoise, + openai: openaiCompat, +}; diff --git a/open-sse/handlers/ttsProviders/googleTts.js b/open-sse/handlers/ttsProviders/googleTts.js new file mode 100644 index 00000000..b77f6d76 --- /dev/null +++ b/open-sse/handlers/ttsProviders/googleTts.js @@ -0,0 +1,54 @@ +// Google Translate TTS (no auth) — scrape token + batchexecute RPC +import { UA } from "./_base.js"; + +const REFRESH_MS = 11 * 60 * 1000; +const cache = { token: null, tokenTime: 0 }; +let _idx = 0; + +async function getToken() { + const now = Date.now(); + if (cache.token && now - cache.tokenTime < REFRESH_MS) return cache.token; + const res = await fetch("https://translate.google.com/", { headers: { "User-Agent": UA } }); + if (!res.ok) throw new Error(`Google translate fetch failed: ${res.status}`); + const html = await res.text(); + const fSid = html.match(/"FdrFJe":"(.*?)"/)?.[1]; + const bl = html.match(/"cfb2h":"(.*?)"/)?.[1]; + if (!fSid || !bl) throw new Error("Failed to parse Google token"); + cache.token = { "f.sid": fSid, bl }; + cache.tokenTime = now; + return cache.token; +} + +export default { + noAuth: true, + async synthesize(text, model) { + const lang = model || "en"; + const token = await getToken(); + const cleanText = text.replace(/[@^*()\\/\-_+=><"'\u201c\u201d\u3010\u3011]/g, " ").replaceAll(", ", ". "); + const rpcId = "jQ1olc"; + const reqId = (++_idx * 100000) + Math.floor(1000 + Math.random() * 9000); + const query = new URLSearchParams({ + rpcids: rpcId, + "f.sid": token["f.sid"], + bl: token.bl, + hl: lang, + "soc-app": 1, "soc-platform": 1, "soc-device": 1, + _reqid: reqId, + rt: "c", + }); + const payload = [cleanText, lang, null, "undefined", [0]]; + const body = new URLSearchParams(); + body.append("f.req", JSON.stringify([[[rpcId, JSON.stringify(payload), null, "generic"]]])); + const res = await fetch(`https://translate.google.com/_/TranslateWebserverUi/data/batchexecute?${query}`, { + method: "POST", + headers: { "Content-Type": "application/x-www-form-urlencoded", "Referer": "https://translate.google.com/" }, + body: body.toString(), + }); + if (!res.ok) throw new Error(`Google TTS failed: ${res.status}`); + const data = await res.text(); + const split = JSON.parse(data.split("\n")[3]); + const base64 = JSON.parse(split[0][2])[0]; + if (!base64 || base64.length < 100) throw new Error("Google TTS returned empty audio"); + return { base64, format: "mp3" }; + }, +}; diff --git a/open-sse/handlers/ttsProviders/index.js b/open-sse/handlers/ttsProviders/index.js new file mode 100644 index 00000000..02345a88 --- /dev/null +++ b/open-sse/handlers/ttsProviders/index.js @@ -0,0 +1,47 @@ +// TTS provider registry +import googleTts from "./googleTts.js"; +import edgeTts, { fetchEdgeTtsVoices } from "./edgeTts.js"; +import localDevice, { fetchLocalDeviceVoices } from "./localDevice.js"; +import elevenlabs, { fetchElevenLabsVoices } from "./elevenlabs.js"; +import openai from "./openai.js"; +import openrouter from "./openrouter.js"; +import { FORMAT_HANDLERS } from "./genericFormats.js"; +import { parseModelVoice } from "./_base.js"; + +// Special providers with custom synthesize() logic +const SPECIAL_ADAPTERS = { + "google-tts": googleTts, + "edge-tts": edgeTts, + "local-device": localDevice, + elevenlabs, + openai, + openrouter, +}; + +export function getTtsAdapter(provider) { + return SPECIAL_ADAPTERS[provider] || null; +} + +// Generic config-driven dispatcher (uses ttsConfig.format) +export async function synthesizeViaConfig(provider, text, model, credentials) { + const { AI_PROVIDERS } = await import("@/shared/constants/providers"); + const cfg = AI_PROVIDERS[provider]?.ttsConfig; + if (!cfg) return null; + const handler = FORMAT_HANDLERS[cfg.format]; + if (!handler) return null; + const apiKey = credentials?.apiKey; + if (cfg.authType !== "none" && !apiKey) throw new Error(`${provider} API key required`); + const defaultModel = cfg.models?.[0]?.id || ""; + const { modelId, voiceId } = parseModelVoice(model, defaultModel, "", cfg.models || []); + return handler({ baseUrl: cfg.baseUrl, apiKey, text, modelId, voiceId }); +} + +// Voice fetchers (used by /api/media-providers/tts/voices route) +export const VOICE_FETCHERS = { + "edge-tts": fetchEdgeTtsVoices, + "local-device": fetchLocalDeviceVoices, + elevenlabs: fetchElevenLabsVoices, +}; + +// Re-export for backward compat +export { fetchEdgeTtsVoices, fetchLocalDeviceVoices, fetchElevenLabsVoices }; diff --git a/open-sse/handlers/ttsProviders/localDevice.js b/open-sse/handlers/ttsProviders/localDevice.js new file mode 100644 index 00000000..74c5930f --- /dev/null +++ b/open-sse/handlers/ttsProviders/localDevice.js @@ -0,0 +1,87 @@ +// Local device TTS — macOS `say` + Windows SAPI + ffmpeg +import { execFile } from "node:child_process"; +import { promisify } from "node:util"; +import { mkdtemp, readFile, rm } from "node:fs/promises"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; + +const execFileAsync = promisify(execFile); + +let _voicesCache = null; + +async function fetchVoicesMac() { + const { stdout } = await execFileAsync("say", ["-v", "?"]); + const voices = []; + for (const line of stdout.split("\n")) { + const m = line.match(/^([^\s].*?)\s{2,}([a-z]{2}_[A-Z]{2})/); + if (!m) continue; + const name = m[1].trim(); + const locale = m[2].trim(); + const lang = locale.split("_")[0]; + const country = locale.split("_")[1]; + voices.push({ id: name, name, locale, lang, country, gender: "" }); + } + return voices; +} + +async function fetchVoicesWin() { + const script = [ + "Add-Type -AssemblyName System.Speech;", + "$s = New-Object System.Speech.Synthesis.SpeechSynthesizer;", + "$s.GetInstalledVoices() | ForEach-Object { $v = $_.VoiceInfo;", + "[PSCustomObject]@{ Name=$v.Name; Culture=$v.Culture.Name; Gender=$v.Gender } }", + "| ConvertTo-Json -Compress", + ].join(" "); + const { stdout } = await execFileAsync( + "powershell.exe", + ["-NoProfile", "-NonInteractive", "-WindowStyle", "Hidden", "-Command", script], + { windowsHide: true } + ); + const raw = JSON.parse(stdout.trim() || "[]"); + const list = Array.isArray(raw) ? raw : [raw]; + return list.map((v) => { + const culture = v.Culture || "en-US"; + const [lang, country = ""] = culture.split("-"); + const genderMap = { 1: "Male", 2: "Female", Male: "Male", Female: "Female" }; + return { + id: v.Name, name: v.Name, + locale: culture.replace("-", "_"), + lang, country, + gender: genderMap[v.Gender] || "", + }; + }); +} + +export async function fetchLocalDeviceVoices() { + if (_voicesCache) return _voicesCache; + try { + const voices = process.platform === "win32" ? await fetchVoicesWin() : await fetchVoicesMac(); + _voicesCache = voices; + return voices; + } catch { + return []; + } +} + +async function synthesizeMacOrWin(text, voiceId) { + const dir = await mkdtemp(join(tmpdir(), "tts-")); + const aiffPath = join(dir, "out.aiff"); + const mp3Path = join(dir, "out.mp3"); + try { + const args = voiceId ? ["-v", voiceId, "-o", aiffPath, text] : ["-o", aiffPath, text]; + await execFileAsync("say", args); + await execFileAsync("ffmpeg", ["-y", "-i", aiffPath, "-codec:a", "libmp3lame", "-qscale:a", "4", mp3Path]); + const buf = await readFile(mp3Path); + return buf.toString("base64"); + } finally { + await rm(dir, { recursive: true, force: true }); + } +} + +export default { + noAuth: true, + async synthesize(text, model) { + const base64 = await synthesizeMacOrWin(text, model); + return { base64, format: "mp3" }; + }, +}; diff --git a/open-sse/handlers/ttsProviders/openai.js b/open-sse/handlers/ttsProviders/openai.js new file mode 100644 index 00000000..6a19f342 --- /dev/null +++ b/open-sse/handlers/ttsProviders/openai.js @@ -0,0 +1,30 @@ +// OpenAI TTS — model format: "tts-model/voice" +import { Buffer } from "node:buffer"; + +export default { + async synthesize(text, model, credentials) { + if (!credentials?.apiKey) throw new Error("No OpenAI API key configured"); + + let ttsModel = "gpt-4o-mini-tts"; + let voice = "alloy"; + if (model && model.includes("/")) { + const parts = model.split("/"); + if (parts.length === 2) [ttsModel, voice] = parts; + } else if (model) { + voice = model; + } + + const baseUrl = (credentials.baseUrl || "https://api.openai.com").replace(/\/+$/, ""); + const res = await fetch(`${baseUrl}/v1/audio/speech`, { + method: "POST", + headers: { "Content-Type": "application/json", "Authorization": `Bearer ${credentials.apiKey}` }, + body: JSON.stringify({ model: ttsModel, voice, input: text }), + }); + if (!res.ok) { + const err = await res.json().catch(() => ({})); + throw new Error(err?.error?.message || `OpenAI TTS failed: ${res.status}`); + } + const buf = await res.arrayBuffer(); + return { base64: Buffer.from(buf).toString("base64"), format: "mp3" }; + }, +}; diff --git a/open-sse/handlers/ttsProviders/openrouter.js b/open-sse/handlers/ttsProviders/openrouter.js new file mode 100644 index 00000000..0b2d932b --- /dev/null +++ b/open-sse/handlers/ttsProviders/openrouter.js @@ -0,0 +1,70 @@ +// OpenRouter TTS — via chat completions + audio modality (SSE stream) +export default { + async synthesize(text, model, credentials) { + if (!credentials?.apiKey) throw new Error("No OpenRouter API key configured"); + + // model format: "tts-model/voice" e.g. "openai/gpt-4o-mini-tts/alloy" + let ttsModel = "openai/gpt-4o-mini-tts"; + let voice = "alloy"; + if (model && model.includes("/")) { + const lastSlash = model.lastIndexOf("/"); + const maybVoice = model.slice(lastSlash + 1); + const maybeModel = model.slice(0, lastSlash); + if (maybeModel.includes("/")) { + ttsModel = maybeModel; + voice = maybVoice; + } else { + voice = model; + } + } else if (model) { + voice = model; + } + + const res = await fetch("https://openrouter.ai/api/v1/chat/completions", { + method: "POST", + headers: { + "Content-Type": "application/json", + "Authorization": `Bearer ${credentials.apiKey}`, + "HTTP-Referer": "https://endpoint-proxy.local", + "X-Title": "Endpoint Proxy", + }, + body: JSON.stringify({ + model: ttsModel, + modalities: ["text", "audio"], + audio: { voice, format: "wav" }, + stream: true, + messages: [{ role: "user", content: text }], + }), + }); + + if (!res.ok) { + const err = await res.json().catch(() => ({})); + throw new Error(err?.error?.message || `OpenRouter TTS failed: ${res.status}`); + } + + // Parse SSE stream, accumulate base64 audio chunks + const chunks = []; + const reader = res.body.getReader(); + const decoder = new TextDecoder(); + let buffer = ""; + + while (true) { + const { done, value } = await reader.read(); + if (done) break; + buffer += decoder.decode(value, { stream: true }); + const lines = buffer.split("\n"); + buffer = lines.pop(); + for (const line of lines) { + if (!line.startsWith("data: ") || line === "data: [DONE]") continue; + try { + const json = JSON.parse(line.slice(6)); + const audioData = json.choices?.[0]?.delta?.audio?.data; + if (audioData) chunks.push(audioData); + } catch {} + } + } + + if (chunks.length === 0) throw new Error("OpenRouter TTS returned no audio data"); + return { base64: chunks.join(""), format: "wav" }; + }, +}; diff --git a/open-sse/services/model.js b/open-sse/services/model.js index c5ebcff3..c127fe7f 100644 --- a/open-sse/services/model.js +++ b/open-sse/services/model.js @@ -67,6 +67,23 @@ const ALIAS_TO_PROVIDER_ID = { "xiaomi-mimo": "xiaomi-mimo", cf: "cloudflare-ai", "cloudflare-ai": "cloudflare-ai", + // Image/video providers + fal: "fal-ai", + "fal-ai": "fal-ai", + stability: "stability-ai", + "stability-ai": "stability-ai", + bfl: "black-forest-labs", + "black-forest-labs": "black-forest-labs", + recraft: "recraft", + topaz: "topaz", + runway: "runwayml", + runwayml: "runwayml", + // Embedding/rerank + jina: "jina-ai", + "jina-ai": "jina-ai", + // TTS + polly: "aws-polly", + "aws-polly": "aws-polly", }; /** diff --git a/package.json b/package.json index d7af04cf..3ad80b90 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "9router-app", - "version": "0.4.14", + "version": "0.4.15", "description": "9Router web dashboard", "private": true, "scripts": { diff --git a/skills/9router-chat/SKILL.md b/skills/9router-chat/SKILL.md new file mode 100644 index 00000000..2c826da3 --- /dev/null +++ b/skills/9router-chat/SKILL.md @@ -0,0 +1,71 @@ +--- +name: 9router-chat +description: Chat / code generation via 9Router using OpenAI /v1/chat/completions or Anthropic /v1/messages format with streaming + auto-fallback combos. Use when the user wants to ask an LLM, generate code, summarize text, or run prompts through 9Router. +--- + +# 9Router — Chat + +Requires `NINEROUTER_URL` (and `NINEROUTER_KEY` if auth enabled). See https://raw.githubusercontent.com/decolua/9router/main/app/skills/9router/SKILL.md for setup. + +## Endpoints + +- `POST $NINEROUTER_URL/v1/chat/completions` — OpenAI format +- `POST $NINEROUTER_URL/v1/messages` — Anthropic format + +## Discover models + +```bash +curl $NINEROUTER_URL/v1/models | jq '.data[].id' +``` + +Combos (e.g. `vip`, `mycodex`) auto-fallback through multiple providers. + +## OpenAI format + +```bash +curl -X POST $NINEROUTER_URL/v1/chat/completions \ + -H "Authorization: Bearer $NINEROUTER_KEY" \ + -H "Content-Type: application/json" \ + -d '{"model":"openai/gpt-5","messages":[{"role":"user","content":"Hi"}],"stream":false}' +``` + +JS (OpenAI SDK): + +```js +import OpenAI from "openai"; +const client = new OpenAI({ baseURL: `${process.env.NINEROUTER_URL}/v1`, apiKey: process.env.NINEROUTER_KEY }); +const res = await client.chat.completions.create({ + model: "openai/gpt-5", + messages: [{ role: "user", content: "Hi" }], + stream: true, +}); +for await (const chunk of res) process.stdout.write(chunk.choices[0]?.delta?.content || ""); +``` + +## Anthropic format + +```bash +curl -X POST $NINEROUTER_URL/v1/messages \ + -H "Authorization: Bearer $NINEROUTER_KEY" \ + -H "anthropic-version: 2023-06-01" \ + -H "Content-Type: application/json" \ + -d '{"model":"cc/claude-opus-4-7","max_tokens":1024,"messages":[{"role":"user","content":"Hi"}]}' +``` + +## Response shape + +OpenAI (`/v1/chat/completions`): +```json +{ "id": "chatcmpl-...", "object": "chat.completion", "model": "openai/gpt-5", + "choices": [{ "index": 0, "message": { "role": "assistant", "content": "Hello!" }, "finish_reason": "stop" }], + "usage": { "prompt_tokens": 8, "completion_tokens": 2, "total_tokens": 10 } } +``` + +Streaming (`stream:true`) emits SSE: `data: {choices:[{delta:{content:"..."}}]}\n\n` ... `data: [DONE]\n\n`. + +Anthropic (`/v1/messages`): +```json +{ "id": "msg_...", "type": "message", "role": "assistant", "model": "cc/claude-opus-4-7", + "content": [{ "type": "text", "text": "Hello!" }], + "stop_reason": "end_turn", "usage": { "input_tokens": 8, "output_tokens": 2 } } +``` diff --git a/skills/9router-embeddings/SKILL.md b/skills/9router-embeddings/SKILL.md new file mode 100644 index 00000000..13ed14ae --- /dev/null +++ b/skills/9router-embeddings/SKILL.md @@ -0,0 +1,67 @@ +--- +name: 9router-embeddings +description: Generate vector embeddings via 9Router /v1/embeddings using OpenAI / Gemini / Mistral / Voyage / Nvidia / GitHub embedding models for RAG, semantic search, similarity. Use when the user wants embeddings, vectors, RAG, semantic search, or to embed text. +--- + +# 9Router — Embeddings + +Requires `NINEROUTER_URL` (and `NINEROUTER_KEY` if auth enabled). See https://raw.githubusercontent.com/decolua/9router/main/app/skills/9router/SKILL.md for setup. + +## Discover models + +```bash +curl $NINEROUTER_URL/v1/models/embedding | jq '.data[].id' +``` + +## Endpoint + +`POST $NINEROUTER_URL/v1/embeddings` + +| Field | Required | Notes | +|---|---|---| +| `model` | yes | from `/v1/models/embedding` | +| `input` | yes | string OR array of strings | +| `encoding_format` | no | `float` (default) / `base64` | +| `dimensions` | no | OpenAI v3 only | + +## Examples + +```bash +curl -X POST $NINEROUTER_URL/v1/embeddings \ + -H "Authorization: Bearer $NINEROUTER_KEY" \ + -H "Content-Type: application/json" \ + -d '{"model":"openai/text-embedding-3-small","input":["hello","world"]}' +``` + +JS: + +```js +const r = await fetch(`${process.env.NINEROUTER_URL}/v1/embeddings`, { + method: "POST", + headers: { "Authorization": `Bearer ${process.env.NINEROUTER_KEY}`, "Content-Type": "application/json" }, + body: JSON.stringify({ model: "gemini/text-embedding-004", input: "RAG chunk text" }), +}); +const { data } = await r.json(); +console.log(data[0].embedding.length); // dimension +``` + +## Response shape + +```json +{ "object": "list", "model": "openai/text-embedding-3-small", + "data": [ + { "object": "embedding", "index": 0, "embedding": [0.0123, -0.045, ...] }, + { "object": "embedding", "index": 1, "embedding": [...] } + ], + "usage": { "prompt_tokens": 5, "total_tokens": 5 } } +``` + +## Provider quirks + +| Provider | Notes | +|---|---| +| `openai`, `openrouter`, `mistral`, `voyage-ai`, `fireworks`, `together`, `nebius`, `github`, `nvidia`, `jina-ai` | Native OpenAI shape — `dimensions` works only on OpenAI v3 (`text-embedding-3-*`) | +| `gemini`, `google_ai_studio` | Server auto-converts to `embedContent`/`batchEmbedContents` — send OpenAI shape | +| `openai-compatible-*`, `custom-embedding-*` | Custom `baseUrl` from credentials | + +Batch (`input` as array) is faster; some providers cap batch size. diff --git a/skills/9router-image/SKILL.md b/skills/9router-image/SKILL.md new file mode 100644 index 00000000..48cb8c7e --- /dev/null +++ b/skills/9router-image/SKILL.md @@ -0,0 +1,84 @@ +--- +name: 9router-image +description: Generate images via 9Router /v1/images/generations using OpenAI / Gemini Imagen / DALL-E / FLUX / MiniMax / SDWebUI / ComfyUI / Codex models. Use when the user wants to create, generate, draw, or render an image, picture, or text-to-image (txt2img). +--- + +# 9Router — Image Generation + +Requires `NINEROUTER_URL` (and `NINEROUTER_KEY` if auth enabled). See https://raw.githubusercontent.com/decolua/9router/main/app/skills/9router/SKILL.md for setup. + +## Discover models + +```bash +curl $NINEROUTER_URL/v1/models/image | jq '.data[].id' +``` + +## Endpoint + +`POST $NINEROUTER_URL/v1/images/generations` + +| Field | Required | Notes | +|---|---|---| +| `model` | yes | from `/v1/models/image` | +| `prompt` | yes | image description | +| `n` | no | count (provider-dependent) | +| `size` | no | `1024x1024`, `1792x1024`, ... | +| `quality` | no | `standard` / `hd` (OpenAI) | +| `response_format` | no | `url` (default) or `b64_json` | + +Add query `?response_format=binary` to receive raw image bytes (handy for saving file). + +## Examples + +Save to file (binary): + +```bash +curl -X POST "$NINEROUTER_URL/v1/images/generations?response_format=binary" \ + -H "Authorization: Bearer $NINEROUTER_KEY" \ + -H "Content-Type: application/json" \ + -d '{"model":"gemini/gemini-3-pro-image-preview","prompt":"watercolor mountains at sunrise","size":"1024x1024"}' \ + --output out.png +``` + +JS (URL response): + +```js +const r = await fetch(`${process.env.NINEROUTER_URL}/v1/images/generations`, { + method: "POST", + headers: { "Authorization": `Bearer ${process.env.NINEROUTER_KEY}`, "Content-Type": "application/json" }, + body: JSON.stringify({ model: "gemini/gemini-3-pro-image-preview", prompt: "neon city", size: "1024x1024" }), +}); +const { data } = await r.json(); +console.log(data[0].url || data[0].b64_json.slice(0, 40)); +``` + +## Response shape + +JSON (default `response_format=url`): +```json +{ "created": 1735000000, "data": [{ "url": "https://..." }] } +``` + +`response_format=b64_json`: +```json +{ "created": 1735000000, "data": [{ "b64_json": "iVBORw0KGgo..." }] } +``` + +Query `?response_format=binary` returns raw image bytes (Content-Type `image/png` or `image/jpeg`). + +## Provider quirks + +Common fields above work everywhere. These add/override: + +| Provider | Extra/changed fields | Notes | +|---|---|---| +| `openai`, `minimax`, `openrouter`, `recraft` | `quality`, `style`, `response_format` | Standard OpenAI shape | +| `gemini` (nano-banana) | — | Only `prompt`; ignores `size`/`n` | +| `codex` (gpt-5.4-image) | `image`, `images[]`, `image_detail`, `output_format`, `background` | SSE stream; **ChatGPT Plus/Pro required** | +| `huggingface` | — | Only `prompt`; returns single image | +| `nanobanana` | `image`, `images[]` (edit mode) | `size` → aspect ratio; async polling | +| `fal-ai` | `image` (img2img) | `n` → `num_images`; `size` → ratio; async | +| `stability-ai` | `style` (preset), `output_format` | `size` → `aspect_ratio` | +| `black-forest-labs` (FLUX) | `image` (ref) | `size` → exact `width`/`height`; async | +| `runwayml` | `image` (ref) | `size` → ratio; async; video models exist | +| `sdwebui`, `comfyui` | — | Localhost noAuth (`:7860` / `:8188`) | diff --git a/skills/9router-tts/SKILL.md b/skills/9router-tts/SKILL.md new file mode 100644 index 00000000..766f3698 --- /dev/null +++ b/skills/9router-tts/SKILL.md @@ -0,0 +1,75 @@ +--- +name: 9router-tts +description: Text-to-speech via 9Router /v1/audio/speech using OpenAI / ElevenLabs / Deepgram / Edge TTS / Google TTS / Hyperbolic / Inworld voices. Use when the user wants to convert text to speech, generate audio, voiceover, narrate, or read text aloud. +--- + +# 9Router — Text-to-Speech + +Requires `NINEROUTER_URL` (and `NINEROUTER_KEY` if auth enabled). See https://raw.githubusercontent.com/decolua/9router/main/app/skills/9router/SKILL.md for setup. + +## Discover voices + +```bash +curl $NINEROUTER_URL/v1/models/tts | jq '.data[].id' +``` + +`model` = voice ID (e.g. `openai/tts-1`, `el/eleven_multilingual_v2`, `edge-tts/en-US-AriaNeural`). + +## Endpoint + +`POST $NINEROUTER_URL/v1/audio/speech` + +| Field | Required | Notes | +|---|---|---| +| `model` | yes | voice ID from `/v1/models/tts` | +| `input` | yes | text to speak | + +Query `?response_format=mp3` (default, raw bytes) or `?response_format=json` (`{audio: base64, format}`). + +## Examples + +Save MP3: + +```bash +curl -X POST "$NINEROUTER_URL/v1/audio/speech" \ + -H "Authorization: Bearer $NINEROUTER_KEY" \ + -H "Content-Type: application/json" \ + -d '{"model":"openai/tts-1","input":"Hello world"}' \ + --output speech.mp3 +``` + +JS (save file): + +```js +import { writeFile } from "node:fs/promises"; +const r = await fetch(`${process.env.NINEROUTER_URL}/v1/audio/speech`, { + method: "POST", + headers: { "Authorization": `Bearer ${process.env.NINEROUTER_KEY}`, "Content-Type": "application/json" }, + body: JSON.stringify({ model: "el/eleven_multilingual_v2", input: "Xin chào" }), +}); +await writeFile("speech.mp3", Buffer.from(await r.arrayBuffer())); +``` + +## Response shape + +Default → raw audio bytes (Content-Type `audio/mp3`). + +`?response_format=json`: +```json +{ "audio": "SUQzBAAAA...", "format": "mp3" } +``` + +## Provider quirks (model format) + +| Provider | `model` format | Notes | +|---|---|---| +| `openai` | `tts-1/alloy` (model/voice) or just voice | Default model `gpt-4o-mini-tts` | +| `elevenlabs` | `/` or `` | Default model `eleven_flash_v2_5`; list voices in Dashboard | +| `openrouter` | `openai/gpt-4o-mini-tts/alloy` | Streamed via chat-completions audio modality | +| `edge-tts` | voice id e.g. `vi-VN-HoaiMyNeural` | **noAuth**; default `vi-VN-HoaiMyNeural` | +| `google-tts` | language code e.g. `en`, `vi` | **noAuth** | +| `local-device` | OS voice name (`say -v ?` / SAPI) | **noAuth**; needs `ffmpeg` | +| `deepgram` | `aura-asteria-en` etc | Token auth | +| `nvidia`, `inworld`, `cartesia`, `playht` | `model/voice` | Provider-specific auth header | +| `coqui`, `tortoise` | speaker / voice id | Localhost noAuth | +| `hyperbolic` | model id | Body = `{text}` only | diff --git a/skills/9router-web-fetch/SKILL.md b/skills/9router-web-fetch/SKILL.md new file mode 100644 index 00000000..19a0693a --- /dev/null +++ b/skills/9router-web-fetch/SKILL.md @@ -0,0 +1,71 @@ +--- +name: 9router-web-fetch +description: Fetch URL → markdown / text / HTML via 9Router /v1/web/fetch using Firecrawl / Jina Reader / Tavily Extract / Exa Contents. Use when the user wants to scrape a webpage, extract URL content, read article, or convert a URL to markdown. +--- + +# 9Router — Web Fetch + +Requires `NINEROUTER_URL` (and `NINEROUTER_KEY` if auth enabled). See https://raw.githubusercontent.com/decolua/9router/main/app/skills/9router/SKILL.md for setup. + +## Discover providers + +```bash +curl $NINEROUTER_URL/v1/models/web | jq '.data[] | select(.kind=="webFetch") | .id' +``` + +IDs end in `/fetch` (e.g. `firecrawl/fetch`, `jina/fetch`). `fetch-combo` chains providers with auto-fallback. + +## Endpoint + +`POST $NINEROUTER_URL/v1/web/fetch` + +| Field | Required | Notes | +|---|---|---| +| `model` (or `provider`) | yes | from `/v1/models/web` (`firecrawl/fetch` or `firecrawl`) | +| `url` | yes | URL to extract | +| `format` | no | `markdown` (default) / `text` / `html` | +| `max_characters` | no | truncate output | + +## Examples + +```bash +curl -X POST $NINEROUTER_URL/v1/web/fetch \ + -H "Authorization: Bearer $NINEROUTER_KEY" \ + -H "Content-Type: application/json" \ + -d '{"model":"jina/fetch","url":"https://9router.com","format":"markdown"}' +``` + +JS: + +```js +const r = await fetch(`${process.env.NINEROUTER_URL}/v1/web/fetch`, { + method: "POST", + headers: { "Authorization": `Bearer ${process.env.NINEROUTER_KEY}`, "Content-Type": "application/json" }, + body: JSON.stringify({ model: "fetch-combo", url: "https://example.com", format: "markdown", max_characters: 5000 }), +}); +const { data } = await r.json(); +console.log(data.title, data.content.length); +``` + +## Response shape + +```json +{ + "provider": "jina-reader", + "url": "...", + "title": "...", + "content": { "format": "markdown", "text": "...", "length": 1234 }, + "metadata": { "author": null, "published_at": null, "language": null }, + "usage": { "fetch_cost_usd": 0 }, + "metrics": { "response_time_ms": 850, "upstream_latency_ms": 700 } +} +``` + +## Provider quirks + +| Provider | Auth | Best for | +|---|---|---| +| `firecrawl` | Bearer | JS-rendered pages, `format=markdown/html` | +| `jina-reader` | Bearer (optional) | Free tier (~1M chars/mo); fastest plain markdown | +| `tavily` | Bearer | Bulk extract; returns `raw_content` | +| `exa` | `x-api-key` | Pre-indexed pages; fast text extraction | diff --git a/skills/9router-web-search/SKILL.md b/skills/9router-web-search/SKILL.md new file mode 100644 index 00000000..3a6e9b12 --- /dev/null +++ b/skills/9router-web-search/SKILL.md @@ -0,0 +1,89 @@ +--- +name: 9router-web-search +description: Web search via 9Router /v1/search using Tavily / Exa / Brave / Serper / SearXNG / Google PSE / Linkup / SearchAPI / You.com / Perplexity. Use when the user wants to search the web, look up information, find articles, or query a search engine. +--- + +# 9Router — Web Search + +Requires `NINEROUTER_URL` (and `NINEROUTER_KEY` if auth enabled). See https://raw.githubusercontent.com/decolua/9router/main/app/skills/9router/SKILL.md for setup. + +## Discover providers + +```bash +curl $NINEROUTER_URL/v1/models/web | jq '.data[] | select(.kind=="webSearch") | .id' +``` + +IDs end in `/search` (e.g. `tavily/search`). Combos (`owned_by:"combo"`) chain providers with auto-fallback. + +## Endpoint + +`POST $NINEROUTER_URL/v1/search` + +| Field | Required | Notes | +|---|---|---| +| `model` (or `provider`) | yes | from `/v1/models/web` (e.g. `tavily/search` or just `tavily`) | +| `query` | yes | search query | +| `max_results` | no | default 5 | +| `search_type` | no | `web` (default) / `news` | +| `country`, `language`, `time_range`, `domain_filter` | no | provider-dependent | + +## Examples + +```bash +curl -X POST $NINEROUTER_URL/v1/search \ + -H "Authorization: Bearer $NINEROUTER_KEY" \ + -H "Content-Type: application/json" \ + -d '{"model":"tavily/search","query":"9Router open source","max_results":5}' +``` + +JS: + +```js +const r = await fetch(`${process.env.NINEROUTER_URL}/v1/search`, { + method: "POST", + headers: { "Authorization": `Bearer ${process.env.NINEROUTER_KEY}`, "Content-Type": "application/json" }, + body: JSON.stringify({ model: "search-combo", query: "latest LLM benchmarks", max_results: 10 }), +}); +console.log(await r.json()); +``` + +## Response shape + +```json +{ + "provider": "tavily", + "query": "9Router open source", + "results": [ + { + "title": "...", "url": "https://...", "display_url": "github.com/...", + "snippet": "...", "position": 1, "score": 0.92, + "published_at": null, "favicon_url": null, "content": null, + "metadata": { "author": null, "language": null, "source_type": null, "image_url": null }, + "citation": { "provider": "tavily", "retrieved_at": "2026-...", "rank": 1 } + } + ], + "answer": null, + "usage": { "queries_used": 1, "search_cost_usd": 0.008 }, + "metrics": { "response_time_ms": 850, "upstream_latency_ms": 700, "total_results_available": 12 }, + "errors": [] +} +``` + +## Provider quirks + +All accept `query` + `max_results`. Optional fields vary: + +| Provider | Supports | Required extras | +|---|---|---| +| `tavily` | country, domain_filter, news topic | — | +| `exa` | domain_filter (incl/excl), news category | — | +| `brave-search` | country, language | — | +| `serper` | country, language, news endpoint | — | +| `perplexity` | country, language, domain_filter | — | +| `linkup` | domain_filter, time_range | `depth: fast/standard/deep` (option) | +| `google-pse` | country, language, time_range, offset | **`cx` required** (providerOptions) | +| `searchapi` | country, language, pagination | — | +| `youcom` | country, language, time_range, domain_filter, full_page | — | +| `searxng` | language, time_range | Self-hosted, **noAuth** | + +Provider IS the model — `"provider":"tavily"` ≡ `"model":"tavily/search"`. diff --git a/skills/9router/SKILL.md b/skills/9router/SKILL.md new file mode 100644 index 00000000..a872a8f8 --- /dev/null +++ b/skills/9router/SKILL.md @@ -0,0 +1,60 @@ +--- +name: 9router +description: Entry point for 9Router — local/remote AI gateway with OpenAI-compatible REST for chat, image, TTS, embeddings, web search, web fetch. Use when the user mentions 9Router, NINEROUTER_URL, or wants AI without writing provider boilerplate. This skill covers setup + indexes capability skills; fetch the relevant capability SKILL.md from the URLs below when needed. +--- + +# 9Router + +Local/remote AI gateway exposing OpenAI-compatible REST. One key, many providers, auto-fallback. + +## Setup + +```bash +export NINEROUTER_URL="http://localhost:20128" # or VPS / tunnel URL +export NINEROUTER_KEY="sk-..." # from Dashboard → Keys (only if requireApiKey=true) +``` + +All requests: `${NINEROUTER_URL}/v1/...` with header `Authorization: Bearer ${NINEROUTER_KEY}` (omit if auth disabled). + +Verify: `curl $NINEROUTER_URL/api/health` → `{"ok":true}` + +## Discover models + +```bash +curl $NINEROUTER_URL/v1/models # chat/LLM (default) +curl $NINEROUTER_URL/v1/models/image # image-gen +curl $NINEROUTER_URL/v1/models/tts # text-to-speech +curl $NINEROUTER_URL/v1/models/embedding # embeddings +curl $NINEROUTER_URL/v1/models/web # web search + fetch (entries have `kind` field) +curl $NINEROUTER_URL/v1/models/stt # speech-to-text +curl $NINEROUTER_URL/v1/models/image-to-text # vision +``` + +Use `data[].id` as `model` field in requests. Combos appear with `owned_by:"combo"`. + +Response shape: +```json +{ "object": "list", "data": [ + { "id": "openai/gpt-5", "object": "model", "owned_by": "openai", "created": 1735000000 }, + { "id": "tavily/search", "object": "model", "kind": "webSearch", "owned_by": "tavily", "created": 1735000000 } +]} +``` + +## Capability skills + +When the user needs a specific capability, fetch that skill's `SKILL.md` from its raw URL: + +| Capability | Raw URL | +|---|---| +| Chat / code-gen | https://raw.githubusercontent.com/decolua/9router/main/app/skills/9router-chat/SKILL.md | +| Image generation | https://raw.githubusercontent.com/decolua/9router/main/app/skills/9router-image/SKILL.md | +| Text-to-speech | https://raw.githubusercontent.com/decolua/9router/main/app/skills/9router-tts/SKILL.md | +| Embeddings | https://raw.githubusercontent.com/decolua/9router/main/app/skills/9router-embeddings/SKILL.md | +| Web search | https://raw.githubusercontent.com/decolua/9router/main/app/skills/9router-web-search/SKILL.md | +| Web fetch (URL → markdown) | https://raw.githubusercontent.com/decolua/9router/main/app/skills/9router-web-fetch/SKILL.md | + +## Errors + +- 401 → set/refresh `NINEROUTER_KEY` (Dashboard → Keys) +- 400 `Invalid model format` → check `model` exists in `/v1/models/` +- 503 `All accounts unavailable` → wait `retry-after` or add another provider account diff --git a/skills/README.md b/skills/README.md new file mode 100644 index 00000000..992800ba --- /dev/null +++ b/skills/README.md @@ -0,0 +1,41 @@ +# 9Router — Agent Skills + +Drop-in skills for any AI agent (Claude, Cursor, ChatGPT, custom SDK). Just **copy a link** below and paste it to your AI — it will fetch the skill and use 9Router for you. + +> Tip: start with the **9router** entry skill — it covers setup and links to all capability skills. + +## Skills + +| Capability | Copy link below and paste to your AI | +|---|---| +| **Entry / Setup** (start here) | https://raw.githubusercontent.com/decolua/9router/main/app/skills/9router/SKILL.md | +| Chat / code-gen | https://raw.githubusercontent.com/decolua/9router/main/app/skills/9router-chat/SKILL.md | +| Image generation | https://raw.githubusercontent.com/decolua/9router/main/app/skills/9router-image/SKILL.md | +| Text-to-speech | https://raw.githubusercontent.com/decolua/9router/main/app/skills/9router-tts/SKILL.md | +| Embeddings | https://raw.githubusercontent.com/decolua/9router/main/app/skills/9router-embeddings/SKILL.md | +| Web search | https://raw.githubusercontent.com/decolua/9router/main/app/skills/9router-web-search/SKILL.md | +| Web fetch (URL → markdown) | https://raw.githubusercontent.com/decolua/9router/main/app/skills/9router-web-fetch/SKILL.md | + +## How to use + +Paste to your AI (Claude, Cursor, ChatGPT, …): + +``` +Read this skill and use it: https://raw.githubusercontent.com/decolua/9router/main/app/skills/9router/SKILL.md +``` + +Then ask normally — *"generate an image of a cat"*, *"transcribe this URL"*, etc. + +## Configure your shell once + +```bash +export NINEROUTER_URL="http://localhost:20128" # local default, or your VPS / tunnel URL +export NINEROUTER_KEY="sk-..." # from Dashboard → Keys (only if requireApiKey=true) +``` + +Verify: `curl $NINEROUTER_URL/api/health` → `{"ok":true}`. + +## Links + +- Source: https://github.com/decolua/9router +- Dashboard: https://9router.com diff --git a/src/app/(dashboard)/dashboard/endpoint/EndpointPageClient.js b/src/app/(dashboard)/dashboard/endpoint/EndpointPageClient.js index 85a0aca4..7613755c 100644 --- a/src/app/(dashboard)/dashboard/endpoint/EndpointPageClient.js +++ b/src/app/(dashboard)/dashboard/endpoint/EndpointPageClient.js @@ -665,7 +665,10 @@ export default function APIPageClient({ machineId }) {
{/* Endpoint Card */} -

API Endpoint

+

+ api + API Endpoint +

{/* Endpoint rows */}
@@ -851,7 +854,10 @@ export default function APIPageClient({ machineId }) { {/* Token Saver (RTK + Caveman) */}
-

Token Saver

+

+ bolt + Token Saver +

@@ -922,7 +928,10 @@ export default function APIPageClient({ machineId }) { {/* API Keys */}
-

API Keys

+

+ vpn_key + API Keys +

diff --git a/src/app/(dashboard)/dashboard/media-providers/[kind]/[id]/page.js b/src/app/(dashboard)/dashboard/media-providers/[kind]/[id]/page.js index af52e94c..cded5093 100644 --- a/src/app/(dashboard)/dashboard/media-providers/[kind]/[id]/page.js +++ b/src/app/(dashboard)/dashboard/media-providers/[kind]/[id]/page.js @@ -885,6 +885,10 @@ function GenericExampleCard({ providerId, kind }) { // Get models for this kind (e.g., type="image") const kindModels = getModelsByProviderId(providerId).filter((m) => m.type === kind); + // Kinds that need a model identifier in the request (image/video/music) + const KIND_NEEDS_MODEL = new Set(["image", "video", "music", "stt", "imageToText"]); + const needsModel = KIND_NEEDS_MODEL.has(kind); + const allowManualModel = needsModel && kindModels.length === 0; const [selectedModel, setSelectedModel] = useState(kindModels[0]?.id ?? ""); const selectedModelObj = kindModels.find((m) => m.id === selectedModel); const supportsEdit = !!selectedModelObj?.capabilities?.includes("edit"); @@ -935,8 +939,10 @@ function GenericExampleCard({ providerId, kind }) { const endpoint = useTunnel ? tunnelEndpoint : localEndpoint; const apiPath = kindConfig.endpoint.path; - // For kinds without model concept (webSearch/webFetch), use providerAlias directly - const modelFull = kindModels.length === 0 ? providerAlias : (selectedModel ? `${providerAlias}/${selectedModel}` : ""); + // webSearch/webFetch: use providerAlias only. Other kinds: append model when present. + const modelFull = !needsModel + ? providerAlias + : (selectedModel ? `${providerAlias}/${selectedModel}` : (allowManualModel ? "" : providerAlias)); // Build request body with optional extra fields (only non-empty values) const extraBodyFromFields = Object.entries(extraValues).reduce((acc, [k, v]) => { @@ -1060,8 +1066,8 @@ function GenericExampleCard({ providerId, kind }) {

Example

- {/* Model selector - only show if models available */} - {kindModels.length > 0 && ( + {/* Model selector — dropdown if presets exist, else manual input for media kinds */} + {kindModels.length > 0 ? ( setSelectedModel(e.target.value)} + placeholder="Enter model id (provider-specific)" + className="w-full px-3 py-1.5 text-sm border border-border rounded-lg bg-background focus:outline-none focus:border-primary font-mono" + /> + + ) : null} {/* Endpoint */} diff --git a/src/app/(dashboard)/dashboard/skills/page.js b/src/app/(dashboard)/dashboard/skills/page.js new file mode 100644 index 00000000..ee3f6299 --- /dev/null +++ b/src/app/(dashboard)/dashboard/skills/page.js @@ -0,0 +1,112 @@ +"use client"; + +import { Card, Badge } from "@/shared/components"; +import { useCopyToClipboard } from "@/shared/hooks/useCopyToClipboard"; +import { + SKILLS, + SKILLS_REPO_URL, + getSkillRawUrl, + getSkillBlobUrl, +} from "@/shared/constants/skills"; + +function CopyButton({ value, label = "Copy link" }) { + const { copied, copy } = useCopyToClipboard(2000); + return ( + + ); +} + +function SkillRow({ skill }) { + const url = getSkillRawUrl(skill.id); + return ( +
+
+ {skill.icon} +
+ +
+
+

{skill.name}

+ {skill.isEntry && ( + START HERE + )} + {skill.endpoint && ( + + {skill.endpoint} + + )} +
+

{skill.description}

+ + {url} + open_in_new + +
+ + +
+ ); +} + +export default function SkillsPage() { + return ( +
+ +
Paste this to your AI:
+
+ Read this skill and use it: {getSkillRawUrl("9router")} +
+
+ +
+ {SKILLS.map((skill) => ( + + ))} +
+ + +
+
+

More on GitHub

+

+ Browse source, README, and examples. +

+
+ + open_in_new + View on GitHub + +
+
+
+ ); +} diff --git a/src/app/api/providers/validate/route.js b/src/app/api/providers/validate/route.js index 60e4aa68..3aa5c5bd 100644 --- a/src/app/api/providers/validate/route.js +++ b/src/app/api/providers/validate/route.js @@ -40,38 +40,40 @@ async function probeWebProvider(provider, apiKey) { return res.status !== 401 && res.status !== 403; } -// Probe a tts/embedding provider using ttsConfig/embeddingConfig. -// Returns true if API key is accepted (status !== 401 && !== 403); null to skip. +// Probe a media provider (tts/embedding/stt/image/video) using *Config. +// Returns true if API key is accepted; null to skip (let default handler decide). async function probeMediaProvider(provider, apiKey) { const p = AI_PROVIDERS[provider]; if (!p) return null; - // Only probe providers that are media-only (not LLM dual-purpose, let LLM validate handle those) + const MEDIA_KINDS = new Set(["tts", "embedding", "stt", "image", "video", "music", "imageToText"]); const kinds = p.serviceKinds || ["llm"]; - const isMediaOnly = kinds.every((k) => k === "tts" || k === "embedding" || k === "stt"); + const isMediaOnly = kinds.every((k) => MEDIA_KINDS.has(k)); if (!isMediaOnly) return null; - const cfg = p.ttsConfig || p.embeddingConfig; - if (!cfg) return null; + const cfg = p.ttsConfig || p.embeddingConfig || p.imageConfig || p.videoConfig || p.musicConfig; + // No probe config → best-effort accept (validate at usage time) + if (!cfg) return true; if (p.noAuth || cfg.authType === "none") return true; // Skip auth schemes that need provider-specific data - if (cfg.authHeader === "playht" || cfg.authHeader === "aws-sigv4") return null; + if (cfg.authHeader === "playht" || cfg.authHeader === "aws-sigv4") return true; - const headers = { "Content-Type": "application/json" }; + const headers = { "Content-Type": "application/json", ...(cfg.extraHeaders || {}) }; - // Apply auth based on authHeader switch (cfg.authHeader) { case "bearer": headers["Authorization"] = `Bearer ${apiKey}`; break; + case "key": headers["Authorization"] = `Key ${apiKey}`; break; case "x-api-key": headers["x-api-key"] = apiKey; break; + case "x-key": headers["x-key"] = apiKey; break; case "xi-api-key": headers["xi-api-key"] = apiKey; break; case "token": headers["Authorization"] = `Token ${apiKey}`; break; case "basic": headers["Authorization"] = `Basic ${apiKey}`; break; default: return null; } - // Minimal POST body — server will reject auth before validating body + const method = cfg.method || "POST"; const res = await fetch(cfg.baseUrl, { - method: "POST", + method, headers, - body: JSON.stringify({ input: "ping", text: "ping", model: cfg.models?.[0]?.id || "test" }), + body: method === "GET" ? undefined : JSON.stringify({ input: "ping", text: "ping", prompt: "ping", model: cfg.models?.[0]?.id || "test" }), signal: AbortSignal.timeout(8000), }); return res.status !== 401 && res.status !== 403; diff --git a/src/app/api/v1/models/[kind]/route.js b/src/app/api/v1/models/[kind]/route.js new file mode 100644 index 00000000..b0f715ba --- /dev/null +++ b/src/app/api/v1/models/[kind]/route.js @@ -0,0 +1,55 @@ +import { buildModelsList } from "../route.js"; + +// URL slug → service kind(s). `web` covers both webSearch and webFetch. +const KIND_SLUG_MAP = { + "image": ["image"], + "tts": ["tts"], + "stt": ["stt"], + "embedding": ["embedding"], + "image-to-text": ["imageToText"], + "web": ["webSearch", "webFetch"], +}; + +export async function OPTIONS() { + return new Response(null, { + headers: { + "Access-Control-Allow-Origin": "*", + "Access-Control-Allow-Methods": "GET, OPTIONS", + "Access-Control-Allow-Headers": "*", + }, + }); +} + +/** + * GET /v1/models/{kind} - OpenAI-compatible models list filtered by capability. + * Supported kinds: image, tts, stt, embedding, image-to-text, web. + */ +export async function GET(_request, { params }) { + try { + const { kind } = await params; + const kindFilter = KIND_SLUG_MAP[kind]; + + if (!kindFilter) { + return Response.json( + { + error: { + message: `Unknown model kind: ${kind}. Supported: ${Object.keys(KIND_SLUG_MAP).join(", ")}`, + type: "invalid_request_error", + }, + }, + { status: 404, headers: { "Access-Control-Allow-Origin": "*" } } + ); + } + + const data = await buildModelsList(kindFilter); + return Response.json({ object: "list", data }, { + headers: { "Access-Control-Allow-Origin": "*" }, + }); + } catch (error) { + console.log("Error fetching models by kind:", error); + return Response.json( + { error: { message: error.message, type: "server_error" } }, + { status: 500 } + ); + } +} diff --git a/src/app/api/v1/models/route.js b/src/app/api/v1/models/route.js index 7003c602..a08c8976 100644 --- a/src/app/api/v1/models/route.js +++ b/src/app/api/v1/models/route.js @@ -1,5 +1,10 @@ import { PROVIDER_MODELS, PROVIDER_ID_TO_ALIAS } from "@/shared/constants/models"; -import { getProviderAlias, isAnthropicCompatibleProvider, isOpenAICompatibleProvider } from "@/shared/constants/providers"; +import { + AI_PROVIDERS, + getProviderAlias, + isAnthropicCompatibleProvider, + isOpenAICompatibleProvider, +} from "@/shared/constants/providers"; import { getProviderConnections, getCombos, getCustomModels, getModelAliases } from "@/lib/localDb"; const parseOpenAIStyleModels = (data) => { @@ -10,6 +15,34 @@ const parseOpenAIStyleModels = (data) => { // Matches provider IDs that are upstream/cross-instance connections (contain a UUID suffix) const UPSTREAM_CONNECTION_RE = /[-_][0-9a-f]{8,}$/i; +// LLM kind sentinel — combos/models with no explicit kind default to LLM +const LLM_KIND = "llm"; + +// Map per-model `type` field (in PROVIDER_MODELS) to service kind. +// Models without `type` are treated as LLM. +const MODEL_TYPE_TO_KIND = { + image: "image", + tts: "tts", + embedding: "embedding", + stt: "stt", + imageToText: "imageToText", +}; + +function modelKind(model) { + if (!model?.type) return LLM_KIND; + return MODEL_TYPE_TO_KIND[model.type] || LLM_KIND; +} + +// For dynamic/unknown model IDs (compatible providers, alias map, custom models) +// fall back to provider-level kind matching when per-model type is unavailable. +function inferKindFromUnknownModelId(modelId) { + const lower = String(modelId).toLowerCase(); + if (/embed/.test(lower)) return "embedding"; + if (/tts|speech|audio|voice/.test(lower)) return "tts"; + if (/image|imagen|dall-?e|flux|sdxl|sd-|stable-diffusion/.test(lower)) return "image"; + return LLM_KIND; +} + async function fetchCompatibleModelIds(connection) { if (!connection?.apiKey) return []; @@ -67,6 +100,271 @@ async function fetchCompatibleModelIds(connection) { } } +// Provider matches kindFilter when its serviceKinds intersect the requested kinds. +// LLM is the default kind for providers missing serviceKinds. +function providerMatchesKinds(providerId, kindFilter) { + const provider = AI_PROVIDERS[providerId]; + const kinds = Array.isArray(provider?.serviceKinds) && provider.serviceKinds.length > 0 + ? provider.serviceKinds + : [LLM_KIND]; + return kindFilter.some((k) => kinds.includes(k)); +} + +// Combo matches kindFilter when its `kind` field is in the list. +// Combos with no kind are treated as LLM. +function comboMatchesKinds(combo, kindFilter) { + const kind = combo?.kind || LLM_KIND; + return kindFilter.includes(kind); +} + +/** + * Build OpenAI-format models list filtered by service kinds. + * @param {string[]} kindFilter - List of service kinds to include (e.g. ["llm"], ["webSearch","webFetch"]). + */ +export async function buildModelsList(kindFilter) { + let connections = []; + try { + connections = await getProviderConnections(); + connections = connections.filter(c => c.isActive !== false); + } catch (e) { + console.log("Could not fetch providers, returning all models"); + } + + let combos = []; + try { + combos = await getCombos(); + } catch (e) { + console.log("Could not fetch combos"); + } + + let customModels = []; + try { + customModels = await getCustomModels(); + } catch (e) { + console.log("Could not fetch custom models"); + } + + let modelAliases = {}; + try { + modelAliases = await getModelAliases(); + } catch (e) { + console.log("Could not fetch model aliases"); + } + + const activeConnectionByProvider = new Map(); + for (const conn of connections) { + if (!activeConnectionByProvider.has(conn.provider)) { + activeConnectionByProvider.set(conn.provider, conn); + } + } + + const models = []; + const timestamp = Math.floor(Date.now() / 1000); + + // Combos first (filtered by kind). Web combos expose `kind` so AI knows search vs fetch. + for (const combo of combos) { + if (!comboMatchesKinds(combo, kindFilter)) continue; + const entry = { + id: combo.name, + object: "model", + created: timestamp, + owned_by: "combo", + }; + if (combo.kind === "webSearch" || combo.kind === "webFetch") { + entry.kind = combo.kind; + } + models.push(entry); + } + + if (connections.length === 0) { + // DB unavailable -> return static models, filtered by per-model kind + const aliasToProviderId = Object.fromEntries( + Object.entries(PROVIDER_ID_TO_ALIAS).map(([id, alias]) => [alias, id]) + ); + for (const [alias, providerModels] of Object.entries(PROVIDER_MODELS)) { + const providerId = aliasToProviderId[alias] || alias; + if (!providerMatchesKinds(providerId, kindFilter)) continue; + for (const model of providerModels) { + if (!kindFilter.includes(modelKind(model))) continue; + models.push({ + id: `${alias}/${model.id}`, + object: "model", + created: timestamp, + owned_by: alias, + }); + } + } + + for (const customModel of customModels) { + if (!customModel?.id || (customModel.type && customModel.type !== "llm")) continue; + // Custom models without active connection are LLM-only by current schema + if (!kindFilter.includes(LLM_KIND)) continue; + const providerAlias = customModel.providerAlias; + if (!providerAlias) continue; + + const modelId = String(customModel.id).trim(); + if (!modelId) continue; + + models.push({ + id: `${providerAlias}/${modelId}`, + object: "model", + created: timestamp, + owned_by: providerAlias, + }); + } + } else { + for (const [providerId, conn] of activeConnectionByProvider.entries()) { + if (!providerMatchesKinds(providerId, kindFilter)) continue; + + const staticAlias = PROVIDER_ID_TO_ALIAS[providerId] || providerId; + const outputAlias = ( + conn?.providerSpecificData?.prefix + || getProviderAlias(providerId) + || staticAlias + ).trim(); + const providerModels = PROVIDER_MODELS[staticAlias] || []; + const enabledModels = conn?.providerSpecificData?.enabledModels; + const hasExplicitEnabledModels = + Array.isArray(enabledModels) && enabledModels.length > 0; + const isCompatibleProvider = + isOpenAICompatibleProvider(providerId) || isAnthropicCompatibleProvider(providerId); + + // Build kind lookup for static models so we can filter even when only IDs are exposed + const staticModelKindById = new Map( + providerModels.map((m) => [m.id, modelKind(m)]) + ); + + let rawModelIds = hasExplicitEnabledModels + ? Array.from( + new Set( + enabledModels.filter( + (modelId) => typeof modelId === "string" && modelId.trim() !== "", + ), + ), + ) + : providerModels.map((model) => model.id); + + if (isCompatibleProvider && rawModelIds.length === 0 && !UPSTREAM_CONNECTION_RE.test(providerId)) { + rawModelIds = await fetchCompatibleModelIds(conn); + } + + const modelIds = rawModelIds + .map((modelId) => { + if (modelId.startsWith(`${outputAlias}/`)) { + return modelId.slice(outputAlias.length + 1); + } + if (modelId.startsWith(`${staticAlias}/`)) { + return modelId.slice(staticAlias.length + 1); + } + if (modelId.startsWith(`${providerId}/`)) { + return modelId.slice(providerId.length + 1); + } + return modelId; + }) + .filter((modelId) => typeof modelId === "string" && modelId.trim() !== ""); + + const customModelIds = customModels + .filter((m) => { + if (!m?.id || (m.type && m.type !== "llm")) return false; + const alias = m.providerAlias; + return alias === staticAlias || alias === outputAlias || alias === providerId; + }) + .map((m) => String(m.id).trim()) + .filter((modelId) => modelId !== ""); + + const aliasModelIds = Object.values(modelAliases || {}) + .filter((fullModel) => { + if (typeof fullModel !== "string" || !fullModel.includes("/")) return false; + return ( + fullModel.startsWith(`${outputAlias}/`) || + fullModel.startsWith(`${staticAlias}/`) || + fullModel.startsWith(`${providerId}/`) + ); + }) + .map((fullModel) => { + if (fullModel.startsWith(`${outputAlias}/`)) { + return fullModel.slice(outputAlias.length + 1); + } + if (fullModel.startsWith(`${staticAlias}/`)) { + return fullModel.slice(staticAlias.length + 1); + } + if (fullModel.startsWith(`${providerId}/`)) { + return fullModel.slice(providerId.length + 1); + } + return fullModel; + }) + .filter((modelId) => typeof modelId === "string" && modelId.trim() !== ""); + + const mergedModelIds = Array.from(new Set([...modelIds, ...customModelIds, ...aliasModelIds])); + + for (const modelId of mergedModelIds) { + // Resolve kind: prefer static metadata, otherwise infer from ID heuristics + const kind = staticModelKindById.get(modelId) || inferKindFromUnknownModelId(modelId); + if (!kindFilter.includes(kind)) continue; + + models.push({ + id: `${outputAlias}/${modelId}`, + object: "model", + created: timestamp, + owned_by: outputAlias, + }); + } + + // Merge sub-config models (TTS / embedding) that live on AI_PROVIDERS, not PROVIDER_MODELS + const providerInfo = AI_PROVIDERS[providerId]; + const subConfigModels = []; + if (kindFilter.includes("tts") && Array.isArray(providerInfo?.ttsConfig?.models)) { + for (const m of providerInfo.ttsConfig.models) { + if (m?.id) subConfigModels.push(m.id); + } + } + if (kindFilter.includes("embedding") && Array.isArray(providerInfo?.embeddingConfig?.models)) { + for (const m of providerInfo.embeddingConfig.models) { + if (m?.id) subConfigModels.push(m.id); + } + } + for (const subId of subConfigModels) { + models.push({ + id: `${outputAlias}/${subId}`, + object: "model", + created: timestamp, + owned_by: outputAlias, + }); + } + + // Web search/fetch — provider IS the model, expose as {alias}/search and/or {alias}/fetch with explicit kind + if (kindFilter.includes("webSearch") && providerInfo?.searchConfig) { + models.push({ + id: `${outputAlias}/search`, + object: "model", + kind: "webSearch", + created: timestamp, + owned_by: outputAlias, + }); + } + if (kindFilter.includes("webFetch") && providerInfo?.fetchConfig) { + models.push({ + id: `${outputAlias}/fetch`, + object: "model", + kind: "webFetch", + created: timestamp, + owned_by: outputAlias, + }); + } + } + } + + const dedupedModels = []; + const seenModelIds = new Set(); + for (const model of models) { + if (!model?.id || seenModelIds.has(model.id)) continue; + seenModelIds.add(model.id); + dedupedModels.push(model); + } + + return dedupedModels; +} + /** * Handle CORS preflight */ @@ -81,203 +379,14 @@ export async function OPTIONS() { } /** - * GET /v1/models - OpenAI compatible models list - * Returns models from all active providers and combos in OpenAI format + * GET /v1/models - OpenAI compatible models list (LLM/chat models only by default). + * For other capabilities use /v1/models/{kind} (image, tts, stt, embedding, image-to-text, web). */ export async function GET() { try { - // Get active provider connections - let connections = []; - try { - connections = await getProviderConnections(); - // Filter to only active connections - connections = connections.filter(c => c.isActive !== false); - } catch (e) { - // If database not available, return all models - console.log("Could not fetch providers, returning all models"); - } - - // Get combos - let combos = []; - try { - combos = await getCombos(); - } catch (e) { - console.log("Could not fetch combos"); - } - - let customModels = []; - try { - customModels = await getCustomModels(); - } catch (e) { - console.log("Could not fetch custom models"); - } - - let modelAliases = {}; - try { - modelAliases = await getModelAliases(); - } catch (e) { - console.log("Could not fetch model aliases"); - } - - // Build first active connection per provider (connections already sorted by priority) - const activeConnectionByProvider = new Map(); - for (const conn of connections) { - if (!activeConnectionByProvider.has(conn.provider)) { - activeConnectionByProvider.set(conn.provider, conn); - } - } - - // Collect models from active providers (or all if none active) - const models = []; - const timestamp = Math.floor(Date.now() / 1000); - - // Add combos first (they appear at the top) - for (const combo of combos) { - models.push({ - id: combo.name, - object: "model", - created: timestamp, - owned_by: "combo", - }); - } - - // Add provider models - if (connections.length === 0) { - // DB unavailable or no active providers -> return all static models - for (const [alias, providerModels] of Object.entries(PROVIDER_MODELS)) { - for (const model of providerModels) { - models.push({ - id: `${alias}/${model.id}`, - object: "model", - created: timestamp, - owned_by: alias, - }); - } - } - - // Also include custom LLM models when no active connections are available. - for (const customModel of customModels) { - if (!customModel?.id || (customModel.type && customModel.type !== "llm")) continue; - const providerAlias = customModel.providerAlias; - if (!providerAlias) continue; - - const modelId = String(customModel.id).trim(); - if (!modelId) continue; - - models.push({ - id: `${providerAlias}/${modelId}`, - object: "model", - created: timestamp, - owned_by: providerAlias, - }); - } - } else { - for (const [providerId, conn] of activeConnectionByProvider.entries()) { - const staticAlias = PROVIDER_ID_TO_ALIAS[providerId] || providerId; - const outputAlias = ( - conn?.providerSpecificData?.prefix - || getProviderAlias(providerId) - || staticAlias - ).trim(); - const providerModels = PROVIDER_MODELS[staticAlias] || []; - const enabledModels = conn?.providerSpecificData?.enabledModels; - const hasExplicitEnabledModels = - Array.isArray(enabledModels) && enabledModels.length > 0; - const isCompatibleProvider = - isOpenAICompatibleProvider(providerId) || isAnthropicCompatibleProvider(providerId); - - // Default: if no explicit selection, all static models are active. - // For compatible providers with no explicit selection, fetch remote /models dynamically. - // If explicit selection exists, expose exactly those model IDs (including non-static IDs). - let rawModelIds = hasExplicitEnabledModels - ? Array.from( - new Set( - enabledModels.filter( - (modelId) => typeof modelId === "string" && modelId.trim() !== "", - ), - ), - ) - : providerModels.map((model) => model.id); - - if (isCompatibleProvider && rawModelIds.length === 0 && !UPSTREAM_CONNECTION_RE.test(providerId)) { - rawModelIds = await fetchCompatibleModelIds(conn); - } - - const modelIds = rawModelIds - .map((modelId) => { - if (modelId.startsWith(`${outputAlias}/`)) { - return modelId.slice(outputAlias.length + 1); - } - if (modelId.startsWith(`${staticAlias}/`)) { - return modelId.slice(staticAlias.length + 1); - } - if (modelId.startsWith(`${providerId}/`)) { - return modelId.slice(providerId.length + 1); - } - return modelId; - }) - .filter((modelId) => typeof modelId === "string" && modelId.trim() !== ""); - - const customModelIds = customModels - .filter((m) => { - if (!m?.id || (m.type && m.type !== "llm")) return false; - const alias = m.providerAlias; - return alias === staticAlias || alias === outputAlias || alias === providerId; - }) - .map((m) => String(m.id).trim()) - .filter((modelId) => modelId !== ""); - - const aliasModelIds = Object.values(modelAliases || {}) - .filter((fullModel) => { - if (typeof fullModel !== "string" || !fullModel.includes("/")) return false; - return ( - fullModel.startsWith(`${outputAlias}/`) || - fullModel.startsWith(`${staticAlias}/`) || - fullModel.startsWith(`${providerId}/`) - ); - }) - .map((fullModel) => { - if (fullModel.startsWith(`${outputAlias}/`)) { - return fullModel.slice(outputAlias.length + 1); - } - if (fullModel.startsWith(`${staticAlias}/`)) { - return fullModel.slice(staticAlias.length + 1); - } - if (fullModel.startsWith(`${providerId}/`)) { - return fullModel.slice(providerId.length + 1); - } - return fullModel; - }) - .filter((modelId) => typeof modelId === "string" && modelId.trim() !== ""); - - const mergedModelIds = Array.from(new Set([...modelIds, ...customModelIds, ...aliasModelIds])); - - for (const modelId of mergedModelIds) { - models.push({ - id: `${outputAlias}/${modelId}`, - object: "model", - created: timestamp, - owned_by: outputAlias, - }); - } - } - } - - const dedupedModels = []; - const seenModelIds = new Set(); - for (const model of models) { - if (!model?.id || seenModelIds.has(model.id)) continue; - seenModelIds.add(model.id); - dedupedModels.push(model); - } - - return Response.json({ - object: "list", - data: dedupedModels, - }, { - headers: { - "Access-Control-Allow-Origin": "*", - }, + const data = await buildModelsList([LLM_KIND]); + return Response.json({ object: "list", data }, { + headers: { "Access-Control-Allow-Origin": "*" }, }); } catch (error) { console.log("Error fetching models:", error); diff --git a/src/shared/components/Header.js b/src/shared/components/Header.js index 6e886300..1043257b 100644 --- a/src/shared/components/Header.js +++ b/src/shared/components/Header.js @@ -123,6 +123,13 @@ const getPageInfo = (pathname) => { icon: "lan", breadcrumbs: [], }; + if (pathname.includes("/skills")) + return { + title: "Agent Skills", + description: "Copy a link and paste to your AI to use 9Router — no install needed", + icon: "extension", + breadcrumbs: [], + }; if (pathname.includes("/endpoint")) return { title: "Endpoint", diff --git a/src/shared/components/Sidebar.js b/src/shared/components/Sidebar.js index 72774b90..55481b03 100644 --- a/src/shared/components/Sidebar.js +++ b/src/shared/components/Sidebar.js @@ -34,6 +34,7 @@ const debugItems = [ const systemItems = [ { href: "/dashboard/proxy-pools", label: "Proxy Pools", icon: "lan" }, + { href: "/dashboard/skills", label: "Skills", icon: "extension" }, ]; export default function Sidebar({ onClose }) { @@ -171,14 +172,14 @@ export default function Sidebar({ onClose }) {
{/* Navigation */} -