mirror of
https://github.com/decolua/9router.git
synced 2026-05-08 12:01:28 +00:00
Feat : Skills
This commit is contained in:
@@ -556,6 +556,40 @@ export const PROVIDER_MODELS = {
|
||||
{ id: "black-forest-labs/FLUX.1-schnell", name: "FLUX.1 Schnell", type: "image", params: [] },
|
||||
{ id: "stabilityai/stable-diffusion-xl-base-1.0", name: "SDXL Base 1.0", type: "image", params: [] },
|
||||
],
|
||||
"fal-ai": [
|
||||
{ id: "fal-ai/flux/schnell", name: "FLUX Schnell", type: "image", params: ["n", "size"] },
|
||||
{ id: "fal-ai/flux/dev", name: "FLUX Dev", type: "image", params: ["n", "size"] },
|
||||
{ id: "fal-ai/flux-pro/v1.1", name: "FLUX Pro v1.1", type: "image", params: ["n", "size"] },
|
||||
{ id: "fal-ai/flux-pro/v1.1-ultra", name: "FLUX Pro v1.1 Ultra", type: "image", params: ["n", "size"] },
|
||||
{ id: "fal-ai/recraft-v3", name: "Recraft V3", type: "image", params: ["n", "size", "style"] },
|
||||
{ id: "fal-ai/ideogram/v2", name: "Ideogram V2", type: "image", params: ["n", "size", "style"] },
|
||||
{ id: "fal-ai/stable-diffusion-v35-large", name: "SD 3.5 Large", type: "image", params: ["n", "size"] },
|
||||
],
|
||||
"stability-ai": [
|
||||
{ id: "stable-image-ultra", name: "Stable Image Ultra", type: "image", params: ["size"] },
|
||||
{ id: "stable-image-core", name: "Stable Image Core", type: "image", params: ["size", "style"] },
|
||||
{ id: "sd3.5-large", name: "Stable Diffusion 3.5 Large", type: "image", params: ["size"] },
|
||||
{ id: "sd3.5-large-turbo", name: "Stable Diffusion 3.5 Large Turbo", type: "image", params: ["size"] },
|
||||
{ id: "sd3.5-medium", name: "Stable Diffusion 3.5 Medium", type: "image", params: ["size"] },
|
||||
],
|
||||
"black-forest-labs": [
|
||||
{ id: "flux-pro-1.1", name: "FLUX Pro 1.1", type: "image", params: ["n", "size"] },
|
||||
{ id: "flux-pro-1.1-ultra", name: "FLUX Pro 1.1 Ultra", type: "image", params: ["size"] },
|
||||
{ id: "flux-pro", name: "FLUX Pro", type: "image", params: ["n", "size"] },
|
||||
{ id: "flux-dev", name: "FLUX Dev", type: "image", params: ["n", "size"] },
|
||||
{ id: "flux-kontext-pro", name: "FLUX Kontext Pro (Edit)", type: "image", params: ["size"], capabilities: ["edit"] },
|
||||
{ id: "flux-kontext-max", name: "FLUX Kontext Max (Edit)", type: "image", params: ["size"], capabilities: ["edit"] },
|
||||
],
|
||||
recraft: [
|
||||
{ id: "recraftv3", name: "Recraft V3", type: "image", params: ["n", "size", "style"] },
|
||||
{ id: "recraftv2", name: "Recraft V2", type: "image", params: ["n", "size", "style"] },
|
||||
],
|
||||
runwayml: [
|
||||
{ id: "gen4_image", name: "Gen-4 Image", type: "image", params: ["size"] },
|
||||
{ id: "gen4_image_turbo", name: "Gen-4 Image Turbo", type: "image", params: ["size"] },
|
||||
{ id: "gen4_turbo", name: "Gen-4 Turbo", type: "video", params: [] },
|
||||
{ id: "gen3a_turbo", name: "Gen-3 Alpha Turbo", type: "video", params: [] },
|
||||
],
|
||||
};
|
||||
|
||||
// Helper functions
|
||||
|
||||
4
open-sse/handlers/embeddingProviders/_base.js
Normal file
4
open-sse/handlers/embeddingProviders/_base.js
Normal file
@@ -0,0 +1,4 @@
|
||||
// Shared embedding helpers
|
||||
export function bearerAuth(creds) {
|
||||
return { "Authorization": `Bearer ${creds.apiKey || creds.accessToken}` };
|
||||
}
|
||||
42
open-sse/handlers/embeddingProviders/gemini.js
Normal file
42
open-sse/handlers/embeddingProviders/gemini.js
Normal file
@@ -0,0 +1,42 @@
|
||||
// Google Gemini embeddings — embedContent / batchEmbedContents
|
||||
const BASE = "https://generativelanguage.googleapis.com/v1beta";
|
||||
|
||||
function modelPath(model) {
|
||||
return model.startsWith("models/") ? model : `models/${model}`;
|
||||
}
|
||||
|
||||
export default {
|
||||
buildUrl: (model, creds, { input } = {}) => {
|
||||
const apiKey = creds.apiKey || creds.accessToken;
|
||||
const path = modelPath(model);
|
||||
const op = Array.isArray(input) ? "batchEmbedContents" : "embedContent";
|
||||
return `${BASE}/${path}:${op}?key=${encodeURIComponent(apiKey)}`;
|
||||
},
|
||||
buildHeaders: () => ({ "Content-Type": "application/json" }),
|
||||
buildBody: (model, { input }) => {
|
||||
const m = modelPath(model);
|
||||
if (Array.isArray(input)) {
|
||||
return { requests: input.map((text) => ({ model: m, content: { parts: [{ text: String(text) }] } })) };
|
||||
}
|
||||
return { model: m, content: { parts: [{ text: String(input) }] } };
|
||||
},
|
||||
normalize: (responseBody, model) => {
|
||||
if (responseBody.object === "list" && Array.isArray(responseBody.data)) return responseBody;
|
||||
let items = [];
|
||||
if (Array.isArray(responseBody.embeddings)) {
|
||||
items = responseBody.embeddings.map((emb, idx) => ({
|
||||
object: "embedding",
|
||||
index: idx,
|
||||
embedding: emb.values || [],
|
||||
}));
|
||||
} else if (responseBody.embedding?.values) {
|
||||
items = [{ object: "embedding", index: 0, embedding: responseBody.embedding.values }];
|
||||
}
|
||||
return {
|
||||
object: "list",
|
||||
data: items,
|
||||
model,
|
||||
usage: { prompt_tokens: 0, total_tokens: 0 },
|
||||
};
|
||||
},
|
||||
};
|
||||
23
open-sse/handlers/embeddingProviders/index.js
Normal file
23
open-sse/handlers/embeddingProviders/index.js
Normal file
@@ -0,0 +1,23 @@
|
||||
// Embeddings provider adapter registry
|
||||
import createOpenAIEmbeddingAdapter from "./openai.js";
|
||||
import gemini from "./gemini.js";
|
||||
import openaiCompatNode from "./openaiCompatNode.js";
|
||||
|
||||
const OPENAI_COMPAT_PROVIDERS = [
|
||||
"openai", "openrouter", "mistral", "voyage-ai", "fireworks",
|
||||
"together", "nebius", "github", "nvidia", "jina-ai",
|
||||
];
|
||||
|
||||
const ADAPTERS = {
|
||||
...Object.fromEntries(OPENAI_COMPAT_PROVIDERS.map((id) => [id, createOpenAIEmbeddingAdapter(id)])),
|
||||
gemini,
|
||||
google_ai_studio: gemini,
|
||||
};
|
||||
|
||||
export function getEmbeddingAdapter(provider) {
|
||||
if (ADAPTERS[provider]) return ADAPTERS[provider];
|
||||
if (provider?.startsWith?.("openai-compatible-") || provider?.startsWith?.("custom-embedding-")) {
|
||||
return openaiCompatNode;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
39
open-sse/handlers/embeddingProviders/openai.js
Normal file
39
open-sse/handlers/embeddingProviders/openai.js
Normal file
@@ -0,0 +1,39 @@
|
||||
// OpenAI-compatible embeddings adapter (most providers)
|
||||
import { bearerAuth } from "./_base.js";
|
||||
|
||||
const ENDPOINTS = {
|
||||
openai: "https://api.openai.com/v1/embeddings",
|
||||
openrouter: "https://openrouter.ai/api/v1/embeddings",
|
||||
mistral: "https://api.mistral.ai/v1/embeddings",
|
||||
"voyage-ai": "https://api.voyageai.com/v1/embeddings",
|
||||
fireworks: "https://api.fireworks.ai/inference/v1/embeddings",
|
||||
together: "https://api.together.xyz/v1/embeddings",
|
||||
nebius: "https://api.tokenfactory.nebius.com/v1/embeddings",
|
||||
github: "https://models.github.ai/inference/embeddings",
|
||||
nvidia: "https://integrate.api.nvidia.com/v1/embeddings",
|
||||
"jina-ai": "https://api.jina.ai/v1/embeddings",
|
||||
};
|
||||
|
||||
export default function createOpenAIEmbeddingAdapter(providerId) {
|
||||
return {
|
||||
buildUrl: () => ENDPOINTS[providerId],
|
||||
buildHeaders: (creds) => {
|
||||
const headers = { "Content-Type": "application/json", ...bearerAuth(creds) };
|
||||
if (providerId === "openrouter") {
|
||||
headers["HTTP-Referer"] = "https://endpoint-proxy.local";
|
||||
headers["X-Title"] = "Endpoint Proxy";
|
||||
}
|
||||
return headers;
|
||||
},
|
||||
buildBody: (model, { input, encoding_format, dimensions }) => {
|
||||
const body = { model, input };
|
||||
if (encoding_format) body.encoding_format = encoding_format;
|
||||
if (dimensions != null && dimensions !== "") {
|
||||
const dim = Number(dimensions);
|
||||
if (Number.isFinite(dim) && dim > 0) body.dimensions = dim;
|
||||
}
|
||||
return body;
|
||||
},
|
||||
normalize: (responseBody) => responseBody,
|
||||
};
|
||||
}
|
||||
13
open-sse/handlers/embeddingProviders/openaiCompatNode.js
Normal file
13
open-sse/handlers/embeddingProviders/openaiCompatNode.js
Normal file
@@ -0,0 +1,13 @@
|
||||
// Custom node providers (openai-compatible-* / custom-embedding-*) — baseUrl from credentials
|
||||
import createOpenAIEmbeddingAdapter from "./openai.js";
|
||||
|
||||
const baseAdapter = createOpenAIEmbeddingAdapter("openai");
|
||||
|
||||
export default {
|
||||
...baseAdapter,
|
||||
buildUrl: (_model, creds) => {
|
||||
const rawBaseUrl = creds?.providerSpecificData?.baseUrl || "https://api.openai.com/v1";
|
||||
const baseUrl = rawBaseUrl.replace(/\/$/, "").replace(/\/embeddings$/, "");
|
||||
return `${baseUrl}/embeddings`;
|
||||
},
|
||||
};
|
||||
@@ -1,196 +1,13 @@
|
||||
import { getModelTargetFormat, PROVIDER_ID_TO_ALIAS } from "../config/providerModels.js";
|
||||
import { createErrorResult, parseUpstreamError, formatProviderError } from "../utils/error.js";
|
||||
import { HTTP_STATUS } from "../config/runtimeConfig.js";
|
||||
import { getExecutor } from "../executors/index.js";
|
||||
import { refreshWithRetry } from "../services/tokenRefresh.js";
|
||||
|
||||
// Google AI (Gemini) provider aliases / identifiers
|
||||
const GEMINI_PROVIDERS = new Set(["gemini", "google_ai_studio"]);
|
||||
|
||||
// Static map: provider id → embeddings endpoint (OpenAI-compatible body format)
|
||||
const EMBEDDING_URLS = {
|
||||
openai: "https://api.openai.com/v1/embeddings",
|
||||
openrouter: "https://openrouter.ai/api/v1/embeddings",
|
||||
mistral: "https://api.mistral.ai/v1/embeddings",
|
||||
"voyage-ai": "https://api.voyageai.com/v1/embeddings",
|
||||
fireworks: "https://api.fireworks.ai/inference/v1/embeddings",
|
||||
together: "https://api.together.xyz/v1/embeddings",
|
||||
nebius: "https://api.tokenfactory.nebius.com/v1/embeddings",
|
||||
github: "https://models.github.ai/inference/embeddings",
|
||||
nvidia: "https://integrate.api.nvidia.com/v1/embeddings",
|
||||
};
|
||||
import { getEmbeddingAdapter } from "./embeddingProviders/index.js";
|
||||
|
||||
/**
|
||||
* Check whether a provider targets the Google AI (Gemini) embeddings API.
|
||||
* @param {string} provider
|
||||
*/
|
||||
function isGeminiProvider(provider) {
|
||||
return GEMINI_PROVIDERS.has(provider);
|
||||
}
|
||||
|
||||
/**
|
||||
* Build the embeddings request body for the target provider.
|
||||
* Core embeddings handler — orchestrator only. Provider-specific URL/headers/body/normalize
|
||||
* live in `./embeddingProviders/{id}.js`.
|
||||
*
|
||||
* - OpenAI / openai-compatible / openrouter: standard { model, input } format.
|
||||
* - Google AI (Gemini): different format per API spec.
|
||||
* - Single input → embedContent body: { model, content: { parts: [{ text }] } }
|
||||
* - Batch input → batchEmbedContents body: { requests: [{ model, content: { parts: [{ text }] } }] }
|
||||
*/
|
||||
function buildEmbeddingsBody(provider, model, input, encodingFormat, dimensions) {
|
||||
if (isGeminiProvider(provider)) {
|
||||
// Normalize model name: Gemini API expects "models/<model>" prefix
|
||||
const geminiModel = model.startsWith("models/") ? model : `models/${model}`;
|
||||
|
||||
if (Array.isArray(input)) {
|
||||
// Batch request
|
||||
return {
|
||||
requests: input.map((text) => ({
|
||||
model: geminiModel,
|
||||
content: { parts: [{ text: String(text) }] }
|
||||
}))
|
||||
};
|
||||
} else {
|
||||
// Single request
|
||||
return {
|
||||
model: geminiModel,
|
||||
content: { parts: [{ text: String(input) }] }
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// Default: OpenAI format
|
||||
const body = { model, input };
|
||||
if (encodingFormat) {
|
||||
body.encoding_format = encodingFormat;
|
||||
}
|
||||
if (dimensions != null && dimensions !== "") {
|
||||
const dim = Number(dimensions);
|
||||
if (Number.isFinite(dim) && dim > 0) body.dimensions = dim;
|
||||
}
|
||||
return body;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build the URL for the embeddings endpoint based on the provider.
|
||||
* @param {string} provider
|
||||
* @param {string} model
|
||||
* @param {object} credentials
|
||||
* @param {string|string[]} input - used to select single vs batch endpoint for Gemini
|
||||
*/
|
||||
function buildEmbeddingsUrl(provider, model, credentials, input) {
|
||||
if (isGeminiProvider(provider)) {
|
||||
const apiKey = credentials.apiKey || credentials.accessToken;
|
||||
// Normalize model name for URL path
|
||||
const modelPath = model.startsWith("models/") ? model : `models/${model}`;
|
||||
|
||||
if (Array.isArray(input)) {
|
||||
// batchEmbedContents for array input (keeps response format consistent even for length=1)
|
||||
return `https://generativelanguage.googleapis.com/v1beta/${modelPath}:batchEmbedContents?key=${encodeURIComponent(apiKey)}`;
|
||||
}
|
||||
return `https://generativelanguage.googleapis.com/v1beta/${modelPath}:embedContent?key=${encodeURIComponent(apiKey)}`;
|
||||
}
|
||||
|
||||
if (EMBEDDING_URLS[provider]) return EMBEDDING_URLS[provider];
|
||||
|
||||
// openai-compatible & custom-embedding providers: use their baseUrl + /embeddings
|
||||
if (provider?.startsWith?.("openai-compatible-") || provider?.startsWith?.("custom-embedding-")) {
|
||||
const rawBaseUrl = credentials?.providerSpecificData?.baseUrl || "https://api.openai.com/v1";
|
||||
// Defensive: strip trailing slash and accidental /embeddings to avoid double-append
|
||||
const baseUrl = rawBaseUrl.replace(/\/$/, "").replace(/\/embeddings$/, "");
|
||||
return `${baseUrl}/embeddings`;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build headers for the embeddings request.
|
||||
*/
|
||||
function buildEmbeddingsHeaders(provider, credentials) {
|
||||
const headers = { "Content-Type": "application/json" };
|
||||
|
||||
if (isGeminiProvider(provider)) {
|
||||
// Gemini API uses API key as query param — no Authorization header needed
|
||||
return headers;
|
||||
}
|
||||
|
||||
switch (provider) {
|
||||
case "openai":
|
||||
case "openrouter":
|
||||
headers["Authorization"] = `Bearer ${credentials.apiKey || credentials.accessToken}`;
|
||||
if (provider === "openrouter") {
|
||||
headers["HTTP-Referer"] = "https://endpoint-proxy.local";
|
||||
headers["X-Title"] = "Endpoint Proxy";
|
||||
}
|
||||
break;
|
||||
default:
|
||||
headers["Authorization"] = `Bearer ${credentials.apiKey || credentials.accessToken}`;
|
||||
}
|
||||
|
||||
return headers;
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalize the embeddings response to OpenAI format.
|
||||
*
|
||||
* Gemini single response:
|
||||
* { embedding: { values: [0.1, 0.2, ...] } }
|
||||
*
|
||||
* Gemini batch response:
|
||||
* { embeddings: [{ values: [...] }, ...] }
|
||||
*
|
||||
* Target OpenAI format:
|
||||
* { object: "list", data: [{ object: "embedding", index: 0, embedding: [...] }], model, usage: {...} }
|
||||
*/
|
||||
function normalizeEmbeddingsResponse(responseBody, model, provider) {
|
||||
// Already in OpenAI format
|
||||
if (responseBody.object === "list" && Array.isArray(responseBody.data)) {
|
||||
return responseBody;
|
||||
}
|
||||
|
||||
if (isGeminiProvider(provider)) {
|
||||
let embeddingItems = [];
|
||||
|
||||
if (Array.isArray(responseBody.embeddings)) {
|
||||
// Batch response
|
||||
embeddingItems = responseBody.embeddings.map((emb, idx) => ({
|
||||
object: "embedding",
|
||||
index: idx,
|
||||
embedding: emb.values || []
|
||||
}));
|
||||
} else if (responseBody.embedding?.values) {
|
||||
// Single response
|
||||
embeddingItems = [{
|
||||
object: "embedding",
|
||||
index: 0,
|
||||
embedding: responseBody.embedding.values
|
||||
}];
|
||||
}
|
||||
|
||||
return {
|
||||
object: "list",
|
||||
data: embeddingItems,
|
||||
model,
|
||||
usage: {
|
||||
prompt_tokens: 0,
|
||||
total_tokens: 0
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
// Try to handle alternate formats gracefully
|
||||
return responseBody;
|
||||
}
|
||||
|
||||
/**
|
||||
* Core embeddings handler — shared between Worker and SSE server.
|
||||
*
|
||||
* @param {object} options
|
||||
* @param {object} options.body - Parsed request body { model, input, encoding_format }
|
||||
* @param {object} options.modelInfo - { provider, model }
|
||||
* @param {object} options.credentials - Provider credentials
|
||||
* @param {object} [options.log] - Logger
|
||||
* @param {function} [options.onCredentialsRefreshed] - Called when creds are refreshed
|
||||
* @param {function} [options.onRequestSuccess] - Called on success (clear error state)
|
||||
* @returns {Promise<{ success: boolean, response: Response, status?: number, error?: string }>}
|
||||
*/
|
||||
export async function handleEmbeddingsCore({
|
||||
@@ -199,7 +16,7 @@ export async function handleEmbeddingsCore({
|
||||
credentials,
|
||||
log,
|
||||
onCredentialsRefreshed,
|
||||
onRequestSuccess
|
||||
onRequestSuccess,
|
||||
}) {
|
||||
const { provider, model } = modelInfo;
|
||||
|
||||
@@ -212,19 +29,22 @@ export async function handleEmbeddingsCore({
|
||||
return createErrorResult(HTTP_STATUS.BAD_REQUEST, "input must be a string or array of strings");
|
||||
}
|
||||
|
||||
const encodingFormat = body.encoding_format || "float";
|
||||
|
||||
// Determine embeddings URL
|
||||
const url = buildEmbeddingsUrl(provider, model, credentials, input);
|
||||
if (!url) {
|
||||
const adapter = getEmbeddingAdapter(provider);
|
||||
if (!adapter) {
|
||||
return createErrorResult(
|
||||
HTTP_STATUS.BAD_REQUEST,
|
||||
`Provider '${provider}' does not support embeddings. Use openai, openrouter, gemini, or an openai-compatible provider.`
|
||||
`Provider '${provider}' does not support embeddings.`
|
||||
);
|
||||
}
|
||||
|
||||
const headers = buildEmbeddingsHeaders(provider, credentials);
|
||||
const requestBody = buildEmbeddingsBody(provider, model, input, encodingFormat, body.dimensions);
|
||||
const ctx = { input };
|
||||
const url = adapter.buildUrl(model, credentials, ctx);
|
||||
const headers = adapter.buildHeaders(credentials, ctx);
|
||||
const requestBody = adapter.buildBody(model, {
|
||||
input,
|
||||
encoding_format: body.encoding_format || "float",
|
||||
dimensions: body.dimensions,
|
||||
});
|
||||
|
||||
log?.debug?.("EMBEDDINGS", `${provider.toUpperCase()} | ${model} | input_type=${Array.isArray(input) ? `array[${input.length}]` : "string"}`);
|
||||
|
||||
@@ -233,7 +53,7 @@ export async function handleEmbeddingsCore({
|
||||
providerResponse = await fetch(url, {
|
||||
method: "POST",
|
||||
headers,
|
||||
body: JSON.stringify(requestBody)
|
||||
body: JSON.stringify(requestBody),
|
||||
});
|
||||
} catch (error) {
|
||||
const errMsg = formatProviderError(error, provider, model, HTTP_STATUS.BAD_GATEWAY);
|
||||
@@ -244,9 +64,9 @@ export async function handleEmbeddingsCore({
|
||||
// Handle 401/403 — try token refresh (skip for noAuth providers)
|
||||
const executor = getExecutor(provider);
|
||||
if (
|
||||
!executor.noAuth &&
|
||||
!executor?.noAuth &&
|
||||
(providerResponse.status === HTTP_STATUS.UNAUTHORIZED ||
|
||||
providerResponse.status === HTTP_STATUS.FORBIDDEN)
|
||||
providerResponse.status === HTTP_STATUS.FORBIDDEN)
|
||||
) {
|
||||
const newCredentials = await refreshWithRetry(
|
||||
() => executor.refreshCredentials(credentials, log),
|
||||
@@ -257,24 +77,17 @@ export async function handleEmbeddingsCore({
|
||||
if (newCredentials?.accessToken || newCredentials?.apiKey) {
|
||||
log?.info?.("TOKEN", `${provider.toUpperCase()} | refreshed for embeddings`);
|
||||
Object.assign(credentials, newCredentials);
|
||||
if (onCredentialsRefreshed && newCredentials) {
|
||||
await onCredentialsRefreshed(newCredentials);
|
||||
}
|
||||
if (onCredentialsRefreshed) await onCredentialsRefreshed(newCredentials);
|
||||
|
||||
// Retry with refreshed credentials
|
||||
try {
|
||||
const retryHeaders = buildEmbeddingsHeaders(provider, credentials);
|
||||
// Rebuild URL for Gemini since API key is embedded in query param
|
||||
const retryUrl = isGeminiProvider(provider)
|
||||
? buildEmbeddingsUrl(provider, model, credentials, input)
|
||||
: url;
|
||||
|
||||
const retryHeaders = adapter.buildHeaders(credentials, ctx);
|
||||
const retryUrl = adapter.buildUrl(model, credentials, ctx);
|
||||
providerResponse = await fetch(retryUrl, {
|
||||
method: "POST",
|
||||
headers: retryHeaders,
|
||||
body: JSON.stringify(requestBody)
|
||||
body: JSON.stringify(requestBody),
|
||||
});
|
||||
} catch (retryError) {
|
||||
} catch {
|
||||
log?.warn?.("TOKEN", `${provider.toUpperCase()} | retry after refresh failed`);
|
||||
}
|
||||
} else {
|
||||
@@ -292,16 +105,13 @@ export async function handleEmbeddingsCore({
|
||||
let responseBody;
|
||||
try {
|
||||
responseBody = await providerResponse.json();
|
||||
} catch (parseError) {
|
||||
} catch {
|
||||
return createErrorResult(HTTP_STATUS.BAD_GATEWAY, `Invalid JSON response from ${provider}`);
|
||||
}
|
||||
|
||||
if (onRequestSuccess) {
|
||||
await onRequestSuccess();
|
||||
}
|
||||
|
||||
const normalized = normalizeEmbeddingsResponse(responseBody, model, provider);
|
||||
if (onRequestSuccess) await onRequestSuccess();
|
||||
|
||||
const normalized = adapter.normalize(responseBody, model);
|
||||
log?.debug?.("EMBEDDINGS", `Success | usage=${JSON.stringify(normalized.usage || {})}`);
|
||||
|
||||
return {
|
||||
@@ -309,8 +119,8 @@ export async function handleEmbeddingsCore({
|
||||
response: new Response(JSON.stringify(normalized), {
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
"Access-Control-Allow-Origin": "*"
|
||||
}
|
||||
})
|
||||
"Access-Control-Allow-Origin": "*",
|
||||
},
|
||||
}),
|
||||
};
|
||||
}
|
||||
|
||||
@@ -1,406 +1,23 @@
|
||||
import { randomUUID } from "node:crypto";
|
||||
import { createErrorResult, parseUpstreamError, formatProviderError } from "../utils/error.js";
|
||||
import { HTTP_STATUS } from "../config/runtimeConfig.js";
|
||||
import { refreshWithRetry } from "../services/tokenRefresh.js";
|
||||
import { getExecutor } from "../executors/index.js";
|
||||
|
||||
const CODEX_RESPONSES_URL = "https://chatgpt.com/backend-api/codex/responses";
|
||||
const CODEX_USER_AGENT = "codex-imagen/0.2.6";
|
||||
const CODEX_VERSION = "0.122.0";
|
||||
const CODEX_ORIGINATOR = "codex_cli_rs";
|
||||
const CODEX_MODEL_SUFFIX = "-image";
|
||||
const CODEX_REF_DETAIL = "high";
|
||||
|
||||
// Image provider configurations
|
||||
const IMAGE_PROVIDERS = {
|
||||
openai: {
|
||||
baseUrl: "https://api.openai.com/v1/images/generations",
|
||||
format: "openai",
|
||||
},
|
||||
gemini: {
|
||||
baseUrl: "https://generativelanguage.googleapis.com/v1beta/models",
|
||||
format: "gemini",
|
||||
},
|
||||
minimax: {
|
||||
baseUrl: "https://api.minimaxi.com/v1/images/generations",
|
||||
format: "openai",
|
||||
},
|
||||
openrouter: {
|
||||
baseUrl: "https://openrouter.ai/api/v1/images/generations",
|
||||
format: "openai",
|
||||
},
|
||||
nanobanana: {
|
||||
baseUrl: "https://api.nanobananaapi.ai/api/v1/nanobanana/generate",
|
||||
format: "nanobanana",
|
||||
},
|
||||
sdwebui: {
|
||||
baseUrl: "http://localhost:7860/sdapi/v1/txt2img",
|
||||
format: "sdwebui",
|
||||
},
|
||||
comfyui: {
|
||||
baseUrl: "http://localhost:8188",
|
||||
format: "comfyui",
|
||||
},
|
||||
huggingface: {
|
||||
baseUrl: "https://api-inference.huggingface.co/models",
|
||||
format: "huggingface",
|
||||
},
|
||||
codex: {
|
||||
baseUrl: CODEX_RESPONSES_URL,
|
||||
format: "codex",
|
||||
stream: true,
|
||||
},
|
||||
};
|
||||
|
||||
// Decode codex chatgpt account id from idToken if not stored
|
||||
function decodeCodexAccountId(idToken) {
|
||||
try {
|
||||
const parts = String(idToken || "").split(".");
|
||||
if (parts.length !== 3) return null;
|
||||
const b64 = parts[1].replace(/-/g, "+").replace(/_/g, "/");
|
||||
const pad = (4 - (b64.length % 4)) % 4;
|
||||
const payload = JSON.parse(Buffer.from(b64 + "=".repeat(pad), "base64").toString("utf8"));
|
||||
return payload?.["https://api.openai.com/auth"]?.chatgpt_account_id || null;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
// Strip "-image" suffix to get the underlying chat model
|
||||
function stripCodexImageModel(model) {
|
||||
return model.endsWith(CODEX_MODEL_SUFFIX)
|
||||
? model.slice(0, -CODEX_MODEL_SUFFIX.length)
|
||||
: model;
|
||||
}
|
||||
|
||||
// Normalize a single ref image input to a data URL
|
||||
function toCodexDataUrl(input) {
|
||||
if (!input) return null;
|
||||
if (typeof input !== "string") return null;
|
||||
if (/^data:image\//i.test(input) || /^https?:\/\//i.test(input)) return input;
|
||||
// assume raw base64 PNG
|
||||
return `data:image/png;base64,${input}`;
|
||||
}
|
||||
|
||||
// Build content array with optional reference images, mirroring codex-imagen tagging
|
||||
function buildCodexContent(prompt, refs, detail = CODEX_REF_DETAIL) {
|
||||
const content = [];
|
||||
refs.forEach((url, index) => {
|
||||
content.push({ type: "input_text", text: `<image name=image${index + 1}>` });
|
||||
content.push({ type: "input_image", image_url: url, detail });
|
||||
content.push({ type: "input_text", text: "</image>" });
|
||||
});
|
||||
content.push({ type: "input_text", text: prompt });
|
||||
return content;
|
||||
}
|
||||
|
||||
// Parse Codex SSE stream, log progress, return final base64 image.
|
||||
// Optional callbacks let caller forward events to client (SSE pipe).
|
||||
async function parseCodexImageStream(response, log, callbacks = {}) {
|
||||
const reader = response.body.getReader();
|
||||
const decoder = new TextDecoder();
|
||||
let buffer = "";
|
||||
let imageB64 = null;
|
||||
let lastEvent = null;
|
||||
let bytesReceived = 0;
|
||||
let lastProgressLogMs = 0;
|
||||
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) break;
|
||||
bytesReceived += value?.byteLength || 0;
|
||||
buffer += decoder.decode(value, { stream: true });
|
||||
|
||||
// SSE events separated by blank line
|
||||
let sepIdx;
|
||||
while ((sepIdx = buffer.indexOf("\n\n")) !== -1) {
|
||||
const block = buffer.slice(0, sepIdx);
|
||||
buffer = buffer.slice(sepIdx + 2);
|
||||
|
||||
const lines = block.split("\n");
|
||||
let eventName = null;
|
||||
let dataStr = "";
|
||||
for (const line of lines) {
|
||||
if (line.startsWith("event:")) eventName = line.slice(6).trim();
|
||||
else if (line.startsWith("data:")) dataStr += line.slice(5).trim();
|
||||
}
|
||||
if (!eventName) continue;
|
||||
if (eventName !== lastEvent) {
|
||||
log?.info?.("IMAGE", `codex progress: ${eventName}`);
|
||||
lastEvent = eventName;
|
||||
}
|
||||
|
||||
// Notify caller about progress (throttled to ~5/s to avoid flooding)
|
||||
const now = Date.now();
|
||||
if (callbacks.onProgress && now - lastProgressLogMs > 200) {
|
||||
lastProgressLogMs = now;
|
||||
callbacks.onProgress({ stage: eventName, bytesReceived });
|
||||
}
|
||||
|
||||
if (eventName === "response.image_generation_call.partial_image" && dataStr) {
|
||||
try {
|
||||
const data = JSON.parse(dataStr);
|
||||
if (callbacks.onPartialImage && data?.partial_image_b64) {
|
||||
callbacks.onPartialImage({ b64_json: data.partial_image_b64, index: data.partial_image_index });
|
||||
}
|
||||
} catch {}
|
||||
}
|
||||
|
||||
if (eventName === "response.output_item.done" && dataStr) {
|
||||
try {
|
||||
const data = JSON.parse(dataStr);
|
||||
const item = data?.item;
|
||||
if (item?.type === "image_generation_call" && item.result) {
|
||||
imageB64 = item.result;
|
||||
}
|
||||
} catch {}
|
||||
}
|
||||
}
|
||||
}
|
||||
return imageB64;
|
||||
}
|
||||
|
||||
// Build SSE Response that pipes codex progress + partial + done events to client
|
||||
function buildCodexSseResponse(providerResponse, log, onSuccess) {
|
||||
const stream = new ReadableStream({
|
||||
async start(controller) {
|
||||
const enc = new TextEncoder();
|
||||
const send = (event, data) => {
|
||||
controller.enqueue(enc.encode(`event: ${event}\ndata: ${JSON.stringify(data)}\n\n`));
|
||||
};
|
||||
try {
|
||||
const b64 = await parseCodexImageStream(providerResponse, log, {
|
||||
onProgress: (info) => send("progress", info),
|
||||
onPartialImage: (info) => send("partial_image", info),
|
||||
});
|
||||
if (!b64) {
|
||||
send("error", { message: "Codex did not return an image. Account may not be entitled (Plus/Pro required)." });
|
||||
} else {
|
||||
if (onSuccess) await onSuccess();
|
||||
send("done", {
|
||||
created: Math.floor(Date.now() / 1000),
|
||||
data: [{ b64_json: b64 }],
|
||||
});
|
||||
}
|
||||
} catch (err) {
|
||||
send("error", { message: err?.message || "Stream failed" });
|
||||
} finally {
|
||||
controller.close();
|
||||
}
|
||||
},
|
||||
});
|
||||
return new Response(stream, {
|
||||
headers: {
|
||||
"Content-Type": "text/event-stream",
|
||||
"Cache-Control": "no-cache, no-transform",
|
||||
"Connection": "keep-alive",
|
||||
"X-Accel-Buffering": "no",
|
||||
"Access-Control-Allow-Origin": "*",
|
||||
},
|
||||
});
|
||||
}
|
||||
import { getImageAdapter } from "./imageProviders/index.js";
|
||||
import { urlToBase64 } from "./imageProviders/_base.js";
|
||||
|
||||
/**
|
||||
* Build image generation URL
|
||||
*/
|
||||
function buildImageUrl(provider, model, credentials) {
|
||||
const config = IMAGE_PROVIDERS[provider];
|
||||
if (!config) return null;
|
||||
|
||||
switch (provider) {
|
||||
case "gemini": {
|
||||
const apiKey = credentials?.apiKey || credentials?.accessToken;
|
||||
const modelId = model.replace(/^models\//, "");
|
||||
return `${config.baseUrl}/${modelId}:generateContent?key=${encodeURIComponent(apiKey)}`;
|
||||
}
|
||||
case "huggingface":
|
||||
return `${config.baseUrl}/${model}`;
|
||||
case "codex":
|
||||
return CODEX_RESPONSES_URL;
|
||||
default:
|
||||
return config.baseUrl;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Build request headers
|
||||
*/
|
||||
function buildImageHeaders(provider, credentials) {
|
||||
const headers = { "Content-Type": "application/json" };
|
||||
|
||||
if (provider === "gemini") {
|
||||
return headers;
|
||||
}
|
||||
|
||||
if (provider === "codex") {
|
||||
const accountId =
|
||||
credentials?.providerSpecificData?.chatgptAccountId ||
|
||||
decodeCodexAccountId(credentials?.idToken);
|
||||
return {
|
||||
"accept": "text/event-stream, application/json",
|
||||
"authorization": `Bearer ${credentials?.accessToken || ""}`,
|
||||
"chatgpt-account-id": accountId || "",
|
||||
"content-type": "application/json",
|
||||
"originator": CODEX_ORIGINATOR,
|
||||
"session_id": randomUUID(),
|
||||
"user-agent": CODEX_USER_AGENT,
|
||||
"version": CODEX_VERSION,
|
||||
"x-client-request-id": randomUUID(),
|
||||
};
|
||||
}
|
||||
|
||||
if (provider === "openrouter") {
|
||||
headers["Authorization"] = `Bearer ${credentials?.apiKey || credentials?.accessToken}`;
|
||||
headers["HTTP-Referer"] = "https://endpoint-proxy.local";
|
||||
headers["X-Title"] = "Endpoint Proxy";
|
||||
return headers;
|
||||
}
|
||||
|
||||
if (provider === "huggingface") {
|
||||
headers["Authorization"] = `Bearer ${credentials?.apiKey || credentials?.accessToken}`;
|
||||
return headers;
|
||||
}
|
||||
|
||||
if (credentials?.apiKey || credentials?.accessToken) {
|
||||
headers["Authorization"] = `Bearer ${credentials.apiKey || credentials.accessToken}`;
|
||||
}
|
||||
|
||||
return headers;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build request body based on provider format
|
||||
*/
|
||||
function buildImageBody(provider, model, body) {
|
||||
const { prompt, n = 1, size = "1024x1024", quality, style, response_format, image, images } = body;
|
||||
|
||||
switch (provider) {
|
||||
case "codex": {
|
||||
const refs = [];
|
||||
if (Array.isArray(images)) images.forEach((i) => { const u = toCodexDataUrl(i); if (u) refs.push(u); });
|
||||
const single = toCodexDataUrl(image);
|
||||
if (single) refs.push(single);
|
||||
const detail = body.image_detail || CODEX_REF_DETAIL;
|
||||
const imgTool = { type: "image_generation", output_format: (body.output_format || "png").toLowerCase() };
|
||||
if (body.size && body.size !== "") imgTool.size = body.size;
|
||||
if (body.quality && body.quality !== "") imgTool.quality = body.quality;
|
||||
if (body.background && body.background !== "") imgTool.background = body.background;
|
||||
return {
|
||||
model: stripCodexImageModel(model),
|
||||
instructions: "",
|
||||
input: [{ type: "message", role: "user", content: buildCodexContent(prompt, refs, detail) }],
|
||||
tools: [imgTool],
|
||||
tool_choice: "auto",
|
||||
parallel_tool_calls: false,
|
||||
prompt_cache_key: randomUUID(),
|
||||
stream: true,
|
||||
store: false,
|
||||
reasoning: null,
|
||||
};
|
||||
}
|
||||
|
||||
case "gemini":
|
||||
return {
|
||||
contents: [{ parts: [{ text: prompt }] }],
|
||||
generationConfig: {
|
||||
responseModalities: ["TEXT", "IMAGE"],
|
||||
},
|
||||
};
|
||||
|
||||
case "sdwebui": {
|
||||
const [width, height] = size.split("x").map(Number);
|
||||
return {
|
||||
prompt,
|
||||
width: width || 512,
|
||||
height: height || 512,
|
||||
steps: 20,
|
||||
batch_size: n,
|
||||
};
|
||||
}
|
||||
|
||||
case "nanobanana": {
|
||||
const sizeMap = {
|
||||
"1024x1024": "1:1",
|
||||
"1024x1792": "9:16",
|
||||
"1792x1024": "16:9",
|
||||
};
|
||||
return {
|
||||
prompt,
|
||||
type: "TEXTTOIAMGE",
|
||||
numImages: n,
|
||||
image_size: sizeMap[size] || "1:1",
|
||||
};
|
||||
}
|
||||
|
||||
default:
|
||||
// OpenAI-compatible format
|
||||
const requestBody = { model, prompt, n, size };
|
||||
if (quality) requestBody.quality = quality;
|
||||
if (style) requestBody.style = style;
|
||||
if (response_format) requestBody.response_format = response_format;
|
||||
return requestBody;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalize response to OpenAI format
|
||||
*/
|
||||
function normalizeImageResponse(responseBody, provider, prompt) {
|
||||
// Already in OpenAI format
|
||||
if (responseBody.created && Array.isArray(responseBody.data)) {
|
||||
return responseBody;
|
||||
}
|
||||
|
||||
const timestamp = Math.floor(Date.now() / 1000);
|
||||
|
||||
switch (provider) {
|
||||
case "gemini": {
|
||||
const parts = responseBody.candidates?.[0]?.content?.parts || [];
|
||||
const images = parts
|
||||
.filter((p) => p.inlineData?.data)
|
||||
.map((p) => ({ b64_json: p.inlineData.data }));
|
||||
return {
|
||||
created: timestamp,
|
||||
data: images.length > 0 ? images : [{ b64_json: "", revised_prompt: prompt }],
|
||||
};
|
||||
}
|
||||
|
||||
case "sdwebui": {
|
||||
const images = Array.isArray(responseBody.images)
|
||||
? responseBody.images.map((img) => ({ b64_json: img }))
|
||||
: [];
|
||||
return { created: timestamp, data: images };
|
||||
}
|
||||
|
||||
case "nanobanana": {
|
||||
if (responseBody.image) {
|
||||
return {
|
||||
created: timestamp,
|
||||
data: [{ b64_json: responseBody.image, revised_prompt: prompt }],
|
||||
};
|
||||
}
|
||||
return { created: timestamp, data: [] };
|
||||
}
|
||||
|
||||
case "huggingface": {
|
||||
// HuggingFace returns binary image data
|
||||
return responseBody;
|
||||
}
|
||||
|
||||
default:
|
||||
return responseBody;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Core image generation handler
|
||||
* Core image generation handler — orchestrator only.
|
||||
* Provider-specific URL/headers/body/parse/normalize live in `./imageProviders/{id}.js`.
|
||||
*
|
||||
* @param {object} options
|
||||
* @param {object} options.body - Request body { model, prompt, n, size, ... }
|
||||
* @param {object} options.modelInfo - { provider, model }
|
||||
* @param {object} options.credentials - Provider credentials
|
||||
* @param {object} [options.log] - Logger
|
||||
* @param {function} [options.onCredentialsRefreshed] - Called when creds are refreshed
|
||||
* @param {function} [options.onRequestSuccess] - Called on success
|
||||
* @param {boolean} [options.streamToClient] - Pipe SSE to client (codex)
|
||||
* @param {boolean} [options.binaryOutput] - Return raw image bytes
|
||||
* @param {function} [options.onCredentialsRefreshed]
|
||||
* @param {function} [options.onRequestSuccess]
|
||||
* @returns {Promise<{ success: boolean, response: Response, status?: number, error?: string }>}
|
||||
*/
|
||||
export async function handleImageGenerationCore({
|
||||
@@ -419,16 +36,17 @@ export async function handleImageGenerationCore({
|
||||
return createErrorResult(HTTP_STATUS.BAD_REQUEST, "Missing required field: prompt");
|
||||
}
|
||||
|
||||
const url = buildImageUrl(provider, model, credentials);
|
||||
if (!url) {
|
||||
const adapter = getImageAdapter(provider);
|
||||
if (!adapter) {
|
||||
return createErrorResult(
|
||||
HTTP_STATUS.BAD_REQUEST,
|
||||
`Provider '${provider}' does not support image generation`
|
||||
);
|
||||
}
|
||||
|
||||
const headers = buildImageHeaders(provider, credentials);
|
||||
const requestBody = buildImageBody(provider, model, body);
|
||||
const url = adapter.buildUrl(model, credentials);
|
||||
const headers = adapter.buildHeaders(credentials);
|
||||
const requestBody = adapter.buildBody(model, body);
|
||||
|
||||
log?.debug?.("IMAGE", `${provider.toUpperCase()} | ${model} | prompt="${body.prompt.slice(0, 50)}..."`);
|
||||
|
||||
@@ -445,10 +63,11 @@ export async function handleImageGenerationCore({
|
||||
return createErrorResult(HTTP_STATUS.BAD_GATEWAY, errMsg);
|
||||
}
|
||||
|
||||
// Handle 401/403 — try token refresh
|
||||
// Handle 401/403 — try token refresh (skipped for noAuth providers)
|
||||
const executor = getExecutor(provider);
|
||||
if (
|
||||
!executor?.noAuth &&
|
||||
!adapter.noAuth &&
|
||||
(providerResponse.status === HTTP_STATUS.UNAUTHORIZED ||
|
||||
providerResponse.status === HTTP_STATUS.FORBIDDEN)
|
||||
) {
|
||||
@@ -461,20 +80,17 @@ export async function handleImageGenerationCore({
|
||||
if (newCredentials?.accessToken || newCredentials?.apiKey) {
|
||||
log?.info?.("TOKEN", `${provider.toUpperCase()} | refreshed for image generation`);
|
||||
Object.assign(credentials, newCredentials);
|
||||
if (onCredentialsRefreshed && newCredentials) {
|
||||
await onCredentialsRefreshed(newCredentials);
|
||||
}
|
||||
if (onCredentialsRefreshed) await onCredentialsRefreshed(newCredentials);
|
||||
|
||||
try {
|
||||
const retryHeaders = buildImageHeaders(provider, credentials);
|
||||
const retryUrl = provider === "gemini" ? buildImageUrl(provider, model, credentials) : url;
|
||||
|
||||
const retryHeaders = adapter.buildHeaders(credentials);
|
||||
const retryUrl = adapter.buildUrl(model, credentials);
|
||||
providerResponse = await fetch(retryUrl, {
|
||||
method: "POST",
|
||||
headers: retryHeaders,
|
||||
body: JSON.stringify(requestBody),
|
||||
});
|
||||
} catch (retryError) {
|
||||
} catch {
|
||||
log?.warn?.("TOKEN", `${provider.toUpperCase()} | retry after refresh failed`);
|
||||
}
|
||||
} else {
|
||||
@@ -489,51 +105,42 @@ export async function handleImageGenerationCore({
|
||||
return createErrorResult(statusCode, errMsg);
|
||||
}
|
||||
|
||||
let responseBody;
|
||||
// Parse provider response — adapter may override (codex SSE / async polling / binary)
|
||||
let parsed;
|
||||
try {
|
||||
if (provider === "huggingface") {
|
||||
const buffer = await providerResponse.arrayBuffer();
|
||||
const base64 = Buffer.from(buffer).toString("base64");
|
||||
responseBody = {
|
||||
created: Math.floor(Date.now() / 1000),
|
||||
data: [{ b64_json: base64 }],
|
||||
};
|
||||
} else if (provider === "codex") {
|
||||
// SSE pipe to client (progress + partial_image + done)
|
||||
if (streamToClient) {
|
||||
return {
|
||||
success: true,
|
||||
response: buildCodexSseResponse(providerResponse, log, onRequestSuccess),
|
||||
};
|
||||
if (adapter.parseResponse) {
|
||||
parsed = await adapter.parseResponse(providerResponse, {
|
||||
headers,
|
||||
log,
|
||||
streamToClient,
|
||||
onRequestSuccess,
|
||||
});
|
||||
// Codex streaming case: returns an SSE Response directly
|
||||
if (parsed?.sseResponse) {
|
||||
return { success: true, response: parsed.sseResponse };
|
||||
}
|
||||
const b64 = await parseCodexImageStream(providerResponse, log);
|
||||
if (!b64) {
|
||||
return createErrorResult(
|
||||
HTTP_STATUS.BAD_GATEWAY,
|
||||
"Codex did not return an image. Account may not be entitled (Plus/Pro required)."
|
||||
);
|
||||
}
|
||||
responseBody = {
|
||||
created: Math.floor(Date.now() / 1000),
|
||||
data: [{ b64_json: b64 }],
|
||||
};
|
||||
} else {
|
||||
responseBody = await providerResponse.json();
|
||||
parsed = await providerResponse.json();
|
||||
}
|
||||
} catch (parseError) {
|
||||
return createErrorResult(HTTP_STATUS.BAD_GATEWAY, `Invalid response from ${provider}`);
|
||||
return createErrorResult(HTTP_STATUS.BAD_GATEWAY, parseError.message || `Invalid response from ${provider}`);
|
||||
}
|
||||
|
||||
if (onRequestSuccess) {
|
||||
await onRequestSuccess();
|
||||
}
|
||||
if (onRequestSuccess) await onRequestSuccess();
|
||||
|
||||
const normalized = normalizeImageResponse(responseBody, provider, body.prompt);
|
||||
// Normalize → OpenAI-compatible shape
|
||||
const normalized = adapter.normalize(parsed, body.prompt);
|
||||
|
||||
// Binary output: decode first b64_json into raw bytes
|
||||
// Already in OpenAI shape? skip re-normalize
|
||||
const finalBody = (normalized.created && Array.isArray(normalized.data)) ? normalized : parsed;
|
||||
|
||||
// Binary output: decode first b64_json (or fetch url) into raw bytes
|
||||
if (binaryOutput) {
|
||||
const first = normalized.data?.[0];
|
||||
const b64 = first?.b64_json;
|
||||
const first = finalBody.data?.[0];
|
||||
let b64 = first?.b64_json;
|
||||
if (!b64 && first?.url) {
|
||||
try { b64 = await urlToBase64(first.url); } catch {}
|
||||
}
|
||||
if (b64) {
|
||||
const buf = Buffer.from(b64, "base64");
|
||||
const fmt = (body.output_format || "png").toLowerCase();
|
||||
@@ -553,7 +160,7 @@ export async function handleImageGenerationCore({
|
||||
|
||||
return {
|
||||
success: true,
|
||||
response: new Response(JSON.stringify(normalized), {
|
||||
response: new Response(JSON.stringify(finalBody), {
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
"Access-Control-Allow-Origin": "*",
|
||||
|
||||
31
open-sse/handlers/imageProviders/_base.js
Normal file
31
open-sse/handlers/imageProviders/_base.js
Normal file
@@ -0,0 +1,31 @@
|
||||
// Shared helpers for image provider adapters
|
||||
|
||||
export const POLL_INTERVAL_MS = 1500;
|
||||
export const POLL_TIMEOUT_MS = 120000;
|
||||
|
||||
export const sleep = (ms) => new Promise((r) => setTimeout(r, ms));
|
||||
|
||||
// Map OpenAI size to provider-specific aspect ratio
|
||||
export function sizeToAspectRatio(size) {
|
||||
if (!size || typeof size !== "string") return "1:1";
|
||||
const map = {
|
||||
"1024x1024": "1:1",
|
||||
"1024x1792": "9:16",
|
||||
"1792x1024": "16:9",
|
||||
"1024x1536": "2:3",
|
||||
"1536x1024": "3:2",
|
||||
};
|
||||
return map[size] || "1:1";
|
||||
}
|
||||
|
||||
// Fetch URL → base64 (for providers returning image URLs)
|
||||
export async function urlToBase64(url) {
|
||||
const res = await fetch(url);
|
||||
if (!res.ok) throw new Error(`Failed to fetch image: ${res.status}`);
|
||||
const buf = await res.arrayBuffer();
|
||||
return Buffer.from(buf).toString("base64");
|
||||
}
|
||||
|
||||
export function nowSec() {
|
||||
return Math.floor(Date.now() / 1000);
|
||||
}
|
||||
43
open-sse/handlers/imageProviders/blackForestLabs.js
Normal file
43
open-sse/handlers/imageProviders/blackForestLabs.js
Normal file
@@ -0,0 +1,43 @@
|
||||
// Black Forest Labs (FLUX) — async submit + polling_url
|
||||
import { sleep, nowSec, POLL_INTERVAL_MS, POLL_TIMEOUT_MS } from "./_base.js";
|
||||
|
||||
const BASE_URL = "https://api.bfl.ai/v1";
|
||||
|
||||
export default {
|
||||
async: true,
|
||||
buildUrl: (model) => `${BASE_URL}/${model}`,
|
||||
buildHeaders: (creds) => {
|
||||
const key = creds?.apiKey || creds?.accessToken;
|
||||
return { "Content-Type": "application/json", "x-key": key };
|
||||
},
|
||||
buildBody: (_model, body) => {
|
||||
const req = { prompt: body.prompt };
|
||||
if (body.size) {
|
||||
const [w, h] = body.size.split("x").map(Number);
|
||||
if (w) req.width = w;
|
||||
if (h) req.height = h;
|
||||
}
|
||||
if (body.image) req.image_prompt = body.image;
|
||||
return req;
|
||||
},
|
||||
async parseResponse(response, { headers }) {
|
||||
const data = await response.json();
|
||||
const pollingUrl = data.polling_url;
|
||||
if (!pollingUrl) throw new Error("BFL: no polling_url returned");
|
||||
const deadline = Date.now() + POLL_TIMEOUT_MS;
|
||||
while (Date.now() < deadline) {
|
||||
await sleep(POLL_INTERVAL_MS);
|
||||
const r = await fetch(pollingUrl, { headers: { "x-key": headers["x-key"], "Accept": "application/json" } });
|
||||
if (!r.ok) throw new Error(`BFL status ${r.status}`);
|
||||
const s = await r.json();
|
||||
if (s.status === "Ready") return s;
|
||||
if (s.status === "Error" || s.status === "Failed") throw new Error(s.error || "BFL generation failed");
|
||||
}
|
||||
throw new Error("BFL polling timeout");
|
||||
},
|
||||
normalize: (responseBody) => {
|
||||
const sample = responseBody.result?.sample;
|
||||
if (sample) return { created: nowSec(), data: [{ url: sample }] };
|
||||
return { created: nowSec(), data: [] };
|
||||
},
|
||||
};
|
||||
198
open-sse/handlers/imageProviders/codex.js
Normal file
198
open-sse/handlers/imageProviders/codex.js
Normal file
@@ -0,0 +1,198 @@
|
||||
// Codex (ChatGPT Plus/Pro) image generation via Responses API + SSE
|
||||
import { randomUUID } from "node:crypto";
|
||||
import { nowSec } from "./_base.js";
|
||||
|
||||
const CODEX_RESPONSES_URL = "https://chatgpt.com/backend-api/codex/responses";
|
||||
const CODEX_USER_AGENT = "codex-imagen/0.2.6";
|
||||
const CODEX_VERSION = "0.122.0";
|
||||
const CODEX_ORIGINATOR = "codex_cli_rs";
|
||||
const CODEX_MODEL_SUFFIX = "-image";
|
||||
const CODEX_REF_DETAIL = "high";
|
||||
|
||||
function decodeAccountId(idToken) {
|
||||
try {
|
||||
const parts = String(idToken || "").split(".");
|
||||
if (parts.length !== 3) return null;
|
||||
const b64 = parts[1].replace(/-/g, "+").replace(/_/g, "/");
|
||||
const pad = (4 - (b64.length % 4)) % 4;
|
||||
const payload = JSON.parse(Buffer.from(b64 + "=".repeat(pad), "base64").toString("utf8"));
|
||||
return payload?.["https://api.openai.com/auth"]?.chatgpt_account_id || null;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function stripImageSuffix(model) {
|
||||
return model.endsWith(CODEX_MODEL_SUFFIX) ? model.slice(0, -CODEX_MODEL_SUFFIX.length) : model;
|
||||
}
|
||||
|
||||
function toDataUrl(input) {
|
||||
if (!input || typeof input !== "string") return null;
|
||||
if (/^data:image\//i.test(input) || /^https?:\/\//i.test(input)) return input;
|
||||
return `data:image/png;base64,${input}`;
|
||||
}
|
||||
|
||||
function buildContent(prompt, refs, detail = CODEX_REF_DETAIL) {
|
||||
const content = [];
|
||||
refs.forEach((url, index) => {
|
||||
content.push({ type: "input_text", text: `<image name=image${index + 1}>` });
|
||||
content.push({ type: "input_image", image_url: url, detail });
|
||||
content.push({ type: "input_text", text: "</image>" });
|
||||
});
|
||||
content.push({ type: "input_text", text: prompt });
|
||||
return content;
|
||||
}
|
||||
|
||||
// Parse Codex SSE stream → final base64 image. Optional callbacks for client streaming.
|
||||
async function parseStream(response, log, callbacks = {}) {
|
||||
const reader = response.body.getReader();
|
||||
const decoder = new TextDecoder();
|
||||
let buffer = "";
|
||||
let imageB64 = null;
|
||||
let lastEvent = null;
|
||||
let bytesReceived = 0;
|
||||
let lastProgressLogMs = 0;
|
||||
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) break;
|
||||
bytesReceived += value?.byteLength || 0;
|
||||
buffer += decoder.decode(value, { stream: true });
|
||||
|
||||
let sepIdx;
|
||||
while ((sepIdx = buffer.indexOf("\n\n")) !== -1) {
|
||||
const block = buffer.slice(0, sepIdx);
|
||||
buffer = buffer.slice(sepIdx + 2);
|
||||
|
||||
const lines = block.split("\n");
|
||||
let eventName = null;
|
||||
let dataStr = "";
|
||||
for (const line of lines) {
|
||||
if (line.startsWith("event:")) eventName = line.slice(6).trim();
|
||||
else if (line.startsWith("data:")) dataStr += line.slice(5).trim();
|
||||
}
|
||||
if (!eventName) continue;
|
||||
if (eventName !== lastEvent) {
|
||||
log?.info?.("IMAGE", `codex progress: ${eventName}`);
|
||||
lastEvent = eventName;
|
||||
}
|
||||
|
||||
const now = Date.now();
|
||||
if (callbacks.onProgress && now - lastProgressLogMs > 200) {
|
||||
lastProgressLogMs = now;
|
||||
callbacks.onProgress({ stage: eventName, bytesReceived });
|
||||
}
|
||||
|
||||
if (eventName === "response.image_generation_call.partial_image" && dataStr) {
|
||||
try {
|
||||
const data = JSON.parse(dataStr);
|
||||
if (callbacks.onPartialImage && data?.partial_image_b64) {
|
||||
callbacks.onPartialImage({ b64_json: data.partial_image_b64, index: data.partial_image_index });
|
||||
}
|
||||
} catch {}
|
||||
}
|
||||
|
||||
if (eventName === "response.output_item.done" && dataStr) {
|
||||
try {
|
||||
const data = JSON.parse(dataStr);
|
||||
const item = data?.item;
|
||||
if (item?.type === "image_generation_call" && item.result) {
|
||||
imageB64 = item.result;
|
||||
}
|
||||
} catch {}
|
||||
}
|
||||
}
|
||||
}
|
||||
return imageB64;
|
||||
}
|
||||
|
||||
// SSE Response that pipes codex progress + partial + done events to client
|
||||
function buildSseResponse(providerResponse, log, onSuccess) {
|
||||
const stream = new ReadableStream({
|
||||
async start(controller) {
|
||||
const enc = new TextEncoder();
|
||||
const send = (event, data) => {
|
||||
controller.enqueue(enc.encode(`event: ${event}\ndata: ${JSON.stringify(data)}\n\n`));
|
||||
};
|
||||
try {
|
||||
const b64 = await parseStream(providerResponse, log, {
|
||||
onProgress: (info) => send("progress", info),
|
||||
onPartialImage: (info) => send("partial_image", info),
|
||||
});
|
||||
if (!b64) {
|
||||
send("error", { message: "Codex did not return an image. Account may not be entitled (Plus/Pro required)." });
|
||||
} else {
|
||||
if (onSuccess) await onSuccess();
|
||||
send("done", { created: nowSec(), data: [{ b64_json: b64 }] });
|
||||
}
|
||||
} catch (err) {
|
||||
send("error", { message: err?.message || "Stream failed" });
|
||||
} finally {
|
||||
controller.close();
|
||||
}
|
||||
},
|
||||
});
|
||||
return new Response(stream, {
|
||||
headers: {
|
||||
"Content-Type": "text/event-stream",
|
||||
"Cache-Control": "no-cache, no-transform",
|
||||
"Connection": "keep-alive",
|
||||
"X-Accel-Buffering": "no",
|
||||
"Access-Control-Allow-Origin": "*",
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
export default {
|
||||
stream: true,
|
||||
buildUrl: () => CODEX_RESPONSES_URL,
|
||||
buildHeaders: (creds) => {
|
||||
const accountId = creds?.providerSpecificData?.chatgptAccountId || decodeAccountId(creds?.idToken);
|
||||
return {
|
||||
"accept": "text/event-stream, application/json",
|
||||
"authorization": `Bearer ${creds?.accessToken || ""}`,
|
||||
"chatgpt-account-id": accountId || "",
|
||||
"content-type": "application/json",
|
||||
"originator": CODEX_ORIGINATOR,
|
||||
"session_id": randomUUID(),
|
||||
"user-agent": CODEX_USER_AGENT,
|
||||
"version": CODEX_VERSION,
|
||||
"x-client-request-id": randomUUID(),
|
||||
};
|
||||
},
|
||||
buildBody: (model, body) => {
|
||||
const refs = [];
|
||||
if (Array.isArray(body.images)) body.images.forEach((i) => { const u = toDataUrl(i); if (u) refs.push(u); });
|
||||
const single = toDataUrl(body.image);
|
||||
if (single) refs.push(single);
|
||||
const detail = body.image_detail || CODEX_REF_DETAIL;
|
||||
const imgTool = { type: "image_generation", output_format: (body.output_format || "png").toLowerCase() };
|
||||
if (body.size && body.size !== "") imgTool.size = body.size;
|
||||
if (body.quality && body.quality !== "") imgTool.quality = body.quality;
|
||||
if (body.background && body.background !== "") imgTool.background = body.background;
|
||||
return {
|
||||
model: stripImageSuffix(model),
|
||||
instructions: "",
|
||||
input: [{ type: "message", role: "user", content: buildContent(body.prompt, refs, detail) }],
|
||||
tools: [imgTool],
|
||||
tool_choice: "auto",
|
||||
parallel_tool_calls: false,
|
||||
prompt_cache_key: randomUUID(),
|
||||
stream: true,
|
||||
store: false,
|
||||
reasoning: null,
|
||||
};
|
||||
},
|
||||
// Custom: codex parses SSE → either pipe to client or collect b64
|
||||
async parseResponse(response, { log, streamToClient, onRequestSuccess }) {
|
||||
if (streamToClient) {
|
||||
return { sseResponse: buildSseResponse(response, log, onRequestSuccess) };
|
||||
}
|
||||
const b64 = await parseStream(response, log);
|
||||
if (!b64) {
|
||||
throw new Error("Codex did not return an image. Account may not be entitled (Plus/Pro required).");
|
||||
}
|
||||
return { created: nowSec(), data: [{ b64_json: b64 }] };
|
||||
},
|
||||
normalize: (responseBody) => responseBody,
|
||||
};
|
||||
8
open-sse/handlers/imageProviders/comfyui.js
Normal file
8
open-sse/handlers/imageProviders/comfyui.js
Normal file
@@ -0,0 +1,8 @@
|
||||
// ComfyUI — local, noAuth (placeholder; full graph workflow not implemented)
|
||||
export default {
|
||||
noAuth: true,
|
||||
buildUrl: () => "http://localhost:8188",
|
||||
buildHeaders: () => ({ "Content-Type": "application/json" }),
|
||||
buildBody: (_model, body) => ({ prompt: body.prompt }),
|
||||
normalize: (responseBody) => responseBody,
|
||||
};
|
||||
41
open-sse/handlers/imageProviders/falAi.js
Normal file
41
open-sse/handlers/imageProviders/falAi.js
Normal file
@@ -0,0 +1,41 @@
|
||||
// Fal.ai — async submit + queue polling
|
||||
import { sleep, nowSec, sizeToAspectRatio, POLL_INTERVAL_MS, POLL_TIMEOUT_MS } from "./_base.js";
|
||||
|
||||
const BASE_URL = "https://queue.fal.run";
|
||||
|
||||
export default {
|
||||
async: true,
|
||||
buildUrl: (model) => `${BASE_URL}/${model}`,
|
||||
buildHeaders: (creds) => {
|
||||
const key = creds?.apiKey || creds?.accessToken;
|
||||
return { "Content-Type": "application/json", "Authorization": `Key ${key}` };
|
||||
},
|
||||
buildBody: (_model, body) => {
|
||||
const req = { prompt: body.prompt, num_images: body.n || 1 };
|
||||
if (body.size) req.image_size = sizeToAspectRatio(body.size);
|
||||
if (body.image) req.image_url = body.image;
|
||||
return req;
|
||||
},
|
||||
async parseResponse(response, { headers }) {
|
||||
const { status_url, response_url } = await response.json();
|
||||
const deadline = Date.now() + POLL_TIMEOUT_MS;
|
||||
while (Date.now() < deadline) {
|
||||
await sleep(POLL_INTERVAL_MS);
|
||||
const r = await fetch(status_url, { headers });
|
||||
if (!r.ok) throw new Error(`Fal status ${r.status}`);
|
||||
const s = await r.json();
|
||||
if (s.status === "COMPLETED") {
|
||||
const fr = await fetch(response_url, { headers });
|
||||
return await fr.json();
|
||||
}
|
||||
if (s.status === "FAILED") throw new Error(s.error || "Fal generation failed");
|
||||
}
|
||||
throw new Error("Fal polling timeout");
|
||||
},
|
||||
normalize: (responseBody) => {
|
||||
const images = Array.isArray(responseBody.images)
|
||||
? responseBody.images
|
||||
: (responseBody.image ? [responseBody.image] : []);
|
||||
return { created: nowSec(), data: images.map((img) => ({ url: img.url || img })) };
|
||||
},
|
||||
};
|
||||
25
open-sse/handlers/imageProviders/gemini.js
Normal file
25
open-sse/handlers/imageProviders/gemini.js
Normal file
@@ -0,0 +1,25 @@
|
||||
// Google Gemini adapter (Nano Banana models)
|
||||
import { nowSec } from "./_base.js";
|
||||
|
||||
const BASE_URL = "https://generativelanguage.googleapis.com/v1beta/models";
|
||||
|
||||
export default {
|
||||
buildUrl: (model, creds) => {
|
||||
const apiKey = creds?.apiKey || creds?.accessToken;
|
||||
const modelId = model.replace(/^models\//, "");
|
||||
return `${BASE_URL}/${modelId}:generateContent?key=${encodeURIComponent(apiKey)}`;
|
||||
},
|
||||
buildHeaders: () => ({ "Content-Type": "application/json" }),
|
||||
buildBody: (_model, body) => ({
|
||||
contents: [{ parts: [{ text: body.prompt }] }],
|
||||
generationConfig: { responseModalities: ["TEXT", "IMAGE"] },
|
||||
}),
|
||||
normalize: (responseBody, prompt) => {
|
||||
const parts = responseBody.candidates?.[0]?.content?.parts || [];
|
||||
const images = parts.filter((p) => p.inlineData?.data).map((p) => ({ b64_json: p.inlineData.data }));
|
||||
return {
|
||||
created: nowSec(),
|
||||
data: images.length > 0 ? images : [{ b64_json: "", revised_prompt: prompt }],
|
||||
};
|
||||
},
|
||||
};
|
||||
22
open-sse/handlers/imageProviders/huggingface.js
Normal file
22
open-sse/handlers/imageProviders/huggingface.js
Normal file
@@ -0,0 +1,22 @@
|
||||
// HuggingFace Inference API — returns binary image
|
||||
import { nowSec } from "./_base.js";
|
||||
|
||||
const BASE_URL = "https://api-inference.huggingface.co/models";
|
||||
|
||||
export default {
|
||||
buildUrl: (model) => `${BASE_URL}/${model}`,
|
||||
buildHeaders: (creds) => {
|
||||
const headers = { "Content-Type": "application/json" };
|
||||
const key = creds?.apiKey || creds?.accessToken;
|
||||
if (key) headers["Authorization"] = `Bearer ${key}`;
|
||||
return headers;
|
||||
},
|
||||
buildBody: (_model, body) => ({ inputs: body.prompt }),
|
||||
// HF returns raw image bytes — convert to b64_json
|
||||
async parseResponse(response) {
|
||||
const buf = await response.arrayBuffer();
|
||||
const base64 = Buffer.from(buf).toString("base64");
|
||||
return { created: nowSec(), data: [{ b64_json: base64 }] };
|
||||
},
|
||||
normalize: (responseBody) => responseBody,
|
||||
};
|
||||
37
open-sse/handlers/imageProviders/index.js
Normal file
37
open-sse/handlers/imageProviders/index.js
Normal file
@@ -0,0 +1,37 @@
|
||||
// Image provider adapter registry
|
||||
import createOpenAIAdapter from "./openai.js";
|
||||
import gemini from "./gemini.js";
|
||||
import codex from "./codex.js";
|
||||
import sdwebui from "./sdwebui.js";
|
||||
import comfyui from "./comfyui.js";
|
||||
import huggingface from "./huggingface.js";
|
||||
import nanobanana from "./nanobanana.js";
|
||||
import falAi from "./falAi.js";
|
||||
import stabilityAi from "./stabilityAi.js";
|
||||
import blackForestLabs from "./blackForestLabs.js";
|
||||
import runwayml from "./runwayml.js";
|
||||
|
||||
const ADAPTERS = {
|
||||
openai: createOpenAIAdapter("openai"),
|
||||
minimax: createOpenAIAdapter("minimax"),
|
||||
openrouter: createOpenAIAdapter("openrouter"),
|
||||
recraft: createOpenAIAdapter("recraft"),
|
||||
gemini,
|
||||
codex,
|
||||
sdwebui,
|
||||
comfyui,
|
||||
huggingface,
|
||||
nanobanana,
|
||||
"fal-ai": falAi,
|
||||
"stability-ai": stabilityAi,
|
||||
"black-forest-labs": blackForestLabs,
|
||||
runwayml,
|
||||
};
|
||||
|
||||
export function getImageAdapter(provider) {
|
||||
return ADAPTERS[provider] || null;
|
||||
}
|
||||
|
||||
export function isImageProvider(provider) {
|
||||
return provider in ADAPTERS;
|
||||
}
|
||||
58
open-sse/handlers/imageProviders/nanobanana.js
Normal file
58
open-sse/handlers/imageProviders/nanobanana.js
Normal file
@@ -0,0 +1,58 @@
|
||||
// NanoBanana API — async submit + poll record-info
|
||||
import { sleep, nowSec, sizeToAspectRatio, POLL_INTERVAL_MS, POLL_TIMEOUT_MS } from "./_base.js";
|
||||
|
||||
const SUBMIT_URL = "https://api.nanobananaapi.ai/api/v1/nanobanana/generate";
|
||||
const POLL_BASE = "https://api.nanobananaapi.ai/api/v1/nanobanana/record-info";
|
||||
|
||||
export default {
|
||||
async: true,
|
||||
buildUrl: () => SUBMIT_URL,
|
||||
buildHeaders: (creds) => {
|
||||
const headers = { "Content-Type": "application/json" };
|
||||
const key = creds?.apiKey || creds?.accessToken;
|
||||
if (key) headers["Authorization"] = `Bearer ${key}`;
|
||||
return headers;
|
||||
},
|
||||
buildBody: (_model, body) => {
|
||||
const ratio = sizeToAspectRatio(body.size);
|
||||
const isEdit = !!(body.image || (Array.isArray(body.images) && body.images.length));
|
||||
const req = {
|
||||
prompt: body.prompt,
|
||||
type: isEdit ? "IMAGETOIAMGE" : "TEXTTOIAMGE",
|
||||
numImages: body.n || 1,
|
||||
image_size: ratio,
|
||||
// API requires callBackUrl; we poll instead so a dummy URL is fine.
|
||||
callBackUrl: "https://localhost/callback",
|
||||
};
|
||||
if (isEdit) {
|
||||
const urls = Array.isArray(body.images) ? body.images.filter(Boolean) : [];
|
||||
if (body.image) urls.push(body.image);
|
||||
req.imageUrls = urls;
|
||||
}
|
||||
return req;
|
||||
},
|
||||
// Async: parse submit → poll until SUCCESS, return raw poll data
|
||||
async parseResponse(response, { headers }) {
|
||||
const submitData = await response.json();
|
||||
if (submitData.code !== 200) throw new Error(submitData.msg || "NanoBanana submit failed");
|
||||
const taskId = submitData.data?.taskId;
|
||||
if (!taskId) throw new Error("NanoBanana: no taskId returned");
|
||||
const pollUrl = `${POLL_BASE}?taskId=${encodeURIComponent(taskId)}`;
|
||||
const deadline = Date.now() + POLL_TIMEOUT_MS;
|
||||
while (Date.now() < deadline) {
|
||||
await sleep(POLL_INTERVAL_MS);
|
||||
const r = await fetch(pollUrl, { headers });
|
||||
if (!r.ok) throw new Error(`NanoBanana status ${r.status}`);
|
||||
const s = await r.json();
|
||||
const flag = s.data?.successFlag;
|
||||
if (flag === 1) return s.data;
|
||||
if (flag === 2 || flag === 3) throw new Error(s.data?.errorMessage || "NanoBanana generation failed");
|
||||
}
|
||||
throw new Error("NanoBanana polling timeout");
|
||||
},
|
||||
normalize: (responseBody, prompt) => {
|
||||
const url = responseBody.response?.resultImageUrl || responseBody.response?.originImageUrl;
|
||||
if (url) return { created: nowSec(), data: [{ url, revised_prompt: prompt }] };
|
||||
return { created: nowSec(), data: [] };
|
||||
},
|
||||
};
|
||||
33
open-sse/handlers/imageProviders/openai.js
Normal file
33
open-sse/handlers/imageProviders/openai.js
Normal file
@@ -0,0 +1,33 @@
|
||||
// OpenAI-compatible adapter (used by openai, minimax, openrouter, recraft)
|
||||
|
||||
const ENDPOINTS = {
|
||||
openai: "https://api.openai.com/v1/images/generations",
|
||||
minimax: "https://api.minimaxi.com/v1/images/generations",
|
||||
openrouter: "https://openrouter.ai/api/v1/images/generations",
|
||||
recraft: "https://external.api.recraft.ai/v1/images/generations",
|
||||
};
|
||||
|
||||
export default function createOpenAIAdapter(providerId) {
|
||||
return {
|
||||
buildUrl: () => ENDPOINTS[providerId],
|
||||
buildHeaders: (creds) => {
|
||||
const headers = { "Content-Type": "application/json" };
|
||||
const key = creds?.apiKey || creds?.accessToken;
|
||||
if (key) headers["Authorization"] = `Bearer ${key}`;
|
||||
if (providerId === "openrouter") {
|
||||
headers["HTTP-Referer"] = "https://endpoint-proxy.local";
|
||||
headers["X-Title"] = "Endpoint Proxy";
|
||||
}
|
||||
return headers;
|
||||
},
|
||||
buildBody: (model, body) => {
|
||||
const { prompt, n = 1, size = "1024x1024", quality, style, response_format } = body;
|
||||
const req = { model, prompt, n, size };
|
||||
if (quality) req.quality = quality;
|
||||
if (style) req.style = style;
|
||||
if (response_format) req.response_format = response_format;
|
||||
return req;
|
||||
},
|
||||
normalize: (responseBody) => responseBody,
|
||||
};
|
||||
}
|
||||
47
open-sse/handlers/imageProviders/runwayml.js
Normal file
47
open-sse/handlers/imageProviders/runwayml.js
Normal file
@@ -0,0 +1,47 @@
|
||||
// Runway ML — async submit + /tasks/{id} polling
|
||||
import { sleep, nowSec, sizeToAspectRatio, POLL_INTERVAL_MS, POLL_TIMEOUT_MS } from "./_base.js";
|
||||
|
||||
const BASE_URL = "https://api.dev.runwayml.com/v1";
|
||||
|
||||
export default {
|
||||
async: true,
|
||||
buildUrl: (model) => {
|
||||
// Image models (gen4_image*) → text_to_image; video models → image_to_video
|
||||
return `${BASE_URL}/${model.includes("image") ? "text_to_image" : "image_to_video"}`;
|
||||
},
|
||||
buildHeaders: (creds) => {
|
||||
const key = creds?.apiKey || creds?.accessToken;
|
||||
return {
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": `Bearer ${key}`,
|
||||
"X-Runway-Version": "2024-11-06",
|
||||
};
|
||||
},
|
||||
buildBody: (model, body) => {
|
||||
const isVideo = !model.includes("image");
|
||||
const ratio = sizeToAspectRatio(body.size);
|
||||
if (isVideo) {
|
||||
return { promptText: body.prompt, model, ratio, duration: 5, ...(body.image ? { promptImage: body.image } : {}) };
|
||||
}
|
||||
return { promptText: body.prompt, model, ratio, ...(body.image ? { referenceImages: [{ uri: body.image }] } : {}) };
|
||||
},
|
||||
async parseResponse(response, { headers }) {
|
||||
const { id } = await response.json();
|
||||
if (!id) throw new Error("Runway: no task id returned");
|
||||
const taskUrl = `${BASE_URL}/tasks/${id}`;
|
||||
const deadline = Date.now() + POLL_TIMEOUT_MS;
|
||||
while (Date.now() < deadline) {
|
||||
await sleep(POLL_INTERVAL_MS);
|
||||
const r = await fetch(taskUrl, { headers });
|
||||
if (!r.ok) throw new Error(`Runway status ${r.status}`);
|
||||
const s = await r.json();
|
||||
if (s.status === "SUCCEEDED") return s;
|
||||
if (s.status === "FAILED" || s.status === "CANCELLED") throw new Error(s.failure || "Runway task failed");
|
||||
}
|
||||
throw new Error("Runway polling timeout");
|
||||
},
|
||||
normalize: (responseBody) => {
|
||||
const outputs = Array.isArray(responseBody.output) ? responseBody.output : [];
|
||||
return { created: nowSec(), data: outputs.map((url) => ({ url })) };
|
||||
},
|
||||
};
|
||||
17
open-sse/handlers/imageProviders/sdwebui.js
Normal file
17
open-sse/handlers/imageProviders/sdwebui.js
Normal file
@@ -0,0 +1,17 @@
|
||||
// SD WebUI (AUTOMATIC1111) — local, noAuth
|
||||
import { nowSec } from "./_base.js";
|
||||
|
||||
export default {
|
||||
noAuth: true,
|
||||
buildUrl: () => "http://localhost:7860/sdapi/v1/txt2img",
|
||||
buildHeaders: () => ({ "Content-Type": "application/json" }),
|
||||
buildBody: (_model, body) => {
|
||||
const { prompt, n = 1, size = "1024x1024" } = body;
|
||||
const [width, height] = size.split("x").map(Number);
|
||||
return { prompt, width: width || 512, height: height || 512, steps: 20, batch_size: n };
|
||||
},
|
||||
normalize: (responseBody) => {
|
||||
const images = Array.isArray(responseBody.images) ? responseBody.images.map((img) => ({ b64_json: img })) : [];
|
||||
return { created: nowSec(), data: images };
|
||||
},
|
||||
};
|
||||
34
open-sse/handlers/imageProviders/stabilityAi.js
Normal file
34
open-sse/handlers/imageProviders/stabilityAi.js
Normal file
@@ -0,0 +1,34 @@
|
||||
// Stability AI v2 — sync, returns { image: "<b64>" }
|
||||
import { nowSec, sizeToAspectRatio } from "./_base.js";
|
||||
|
||||
const BASE_URL = "https://api.stability.ai/v2beta/stable-image/generate";
|
||||
|
||||
// Map model id → endpoint segment
|
||||
function modelToEndpoint(model) {
|
||||
if (model.includes("ultra")) return "ultra";
|
||||
if (model.includes("sd3")) return "sd3";
|
||||
return "core";
|
||||
}
|
||||
|
||||
export default {
|
||||
buildUrl: (model) => `${BASE_URL}/${modelToEndpoint(model)}`,
|
||||
buildHeaders: (creds) => {
|
||||
const key = creds?.apiKey || creds?.accessToken;
|
||||
return {
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": `Bearer ${key}`,
|
||||
"Accept": "application/json",
|
||||
};
|
||||
},
|
||||
buildBody: (model, body) => {
|
||||
const req = { prompt: body.prompt, output_format: (body.output_format || "png").toLowerCase() };
|
||||
if (body.size) req.aspect_ratio = sizeToAspectRatio(body.size);
|
||||
if (body.style) req.style_preset = body.style;
|
||||
if (model.includes("sd3")) req.model = model;
|
||||
return req;
|
||||
},
|
||||
normalize: (responseBody) => {
|
||||
if (responseBody.image) return { created: nowSec(), data: [{ b64_json: responseBody.image }] };
|
||||
return { created: nowSec(), data: [] };
|
||||
},
|
||||
};
|
||||
@@ -1,12 +1,15 @@
|
||||
import { Buffer } from "node:buffer";
|
||||
import { createErrorResult } from "../utils/error.js";
|
||||
import { HTTP_STATUS } from "../config/runtimeConfig.js";
|
||||
import { execFile } from "child_process";
|
||||
import { promisify } from "util";
|
||||
import { mkdtemp, readFile, rm } from "fs/promises";
|
||||
import { tmpdir } from "os";
|
||||
import { join } from "path";
|
||||
import { getTtsAdapter, synthesizeViaConfig } from "./ttsProviders/index.js";
|
||||
|
||||
const execFileAsync = promisify(execFile);
|
||||
// Re-export voice fetchers + voices APIs for backward compat with existing routes
|
||||
export {
|
||||
VOICE_FETCHERS,
|
||||
fetchEdgeTtsVoices,
|
||||
fetchLocalDeviceVoices,
|
||||
fetchElevenLabsVoices,
|
||||
} from "./ttsProviders/index.js";
|
||||
|
||||
// ── Response Formatter (DRY) ───────────────────────────────────
|
||||
function createTtsResponse(base64Audio, format, responseFormat) {
|
||||
@@ -25,7 +28,7 @@ function createTtsResponse(base64Audio, format, responseFormat) {
|
||||
};
|
||||
}
|
||||
|
||||
// Binary format (default): return raw MP3
|
||||
// Binary format (default): return raw audio
|
||||
return {
|
||||
success: true,
|
||||
response: new Response(audioBuffer, {
|
||||
@@ -38,691 +41,11 @@ function createTtsResponse(base64Audio, format, responseFormat) {
|
||||
};
|
||||
}
|
||||
|
||||
// ── Token cache per engine ─────────────────────────────────────
|
||||
const cache = {
|
||||
google: { token: null, tokenTime: 0 },
|
||||
bing: { token: null, tokenTime: 0 },
|
||||
};
|
||||
|
||||
const GOOGLE_REFRESH = 11 * 60 * 1000;
|
||||
const BING_REFRESH = 5 * 60 * 1000; // conservative: token TTL is 1h but refresh early
|
||||
|
||||
const UA = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Safari/537.36";
|
||||
const SEC_CH_HEADERS = {
|
||||
"sec-ch-ua": '"Chromium";v="146", "Not-A.Brand";v="24", "Google Chrome";v="146"',
|
||||
"sec-ch-ua-arch": '"arm"',
|
||||
"sec-ch-ua-bitness": '"64"',
|
||||
"sec-ch-ua-full-version": '"146.0.7680.178"',
|
||||
"sec-ch-ua-full-version-list": '"Chromium";v="146.0.7680.178", "Not-A.Brand";v="24.0.0.0", "Google Chrome";v="146.0.7680.178"',
|
||||
"sec-ch-ua-mobile": "?0",
|
||||
"sec-ch-ua-model": '""',
|
||||
"sec-ch-ua-platform": '"macOS"',
|
||||
"sec-ch-ua-platform-version": '"15.1.0"',
|
||||
};
|
||||
|
||||
// ── Google TTS ─────────────────────────────────────────────────
|
||||
async function getGoogleToken() {
|
||||
const now = Date.now();
|
||||
if (cache.google.token && now - cache.google.tokenTime < GOOGLE_REFRESH) {
|
||||
return cache.google.token;
|
||||
}
|
||||
const res = await fetch("https://translate.google.com/", {
|
||||
headers: { "User-Agent": UA },
|
||||
});
|
||||
if (!res.ok) throw new Error(`Google translate fetch failed: ${res.status}`);
|
||||
const html = await res.text();
|
||||
const fSid = html.match(/"FdrFJe":"(.*?)"/)?.[ 1];
|
||||
const bl = html.match(/"cfb2h":"(.*?)"/)?.[ 1];
|
||||
if (!fSid || !bl) throw new Error("Failed to parse Google token");
|
||||
cache.google.token = { "f.sid": fSid, bl };
|
||||
cache.google.tokenTime = now;
|
||||
return cache.google.token;
|
||||
}
|
||||
|
||||
let _googleIdx = 0;
|
||||
async function googleTts(text, lang) {
|
||||
const token = await getGoogleToken();
|
||||
const cleanText = text.replace(/[@^*()\\/\-_+=><"'\u201c\u201d\u3010\u3011]/g, " ").replaceAll(", ", ". ");
|
||||
const rpcId = "jQ1olc";
|
||||
const reqId = (++_googleIdx * 100000) + Math.floor(1000 + Math.random() * 9000);
|
||||
const query = new URLSearchParams({
|
||||
rpcids: rpcId,
|
||||
"f.sid": token["f.sid"],
|
||||
bl: token.bl,
|
||||
hl: lang,
|
||||
"soc-app": 1, "soc-platform": 1, "soc-device": 1,
|
||||
_reqid: reqId,
|
||||
rt: "c",
|
||||
});
|
||||
const payload = [cleanText, lang, null, "undefined", [0]];
|
||||
const body = new URLSearchParams();
|
||||
body.append("f.req", JSON.stringify([[[rpcId, JSON.stringify(payload), null, "generic"]]]));
|
||||
const res = await fetch(`https://translate.google.com/_/TranslateWebserverUi/data/batchexecute?${query}`, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/x-www-form-urlencoded", "Referer": "https://translate.google.com/" },
|
||||
body: body.toString(),
|
||||
});
|
||||
if (!res.ok) throw new Error(`Google TTS failed: ${res.status}`);
|
||||
const data = await res.text();
|
||||
const split = JSON.parse(data.split("\n")[3]);
|
||||
const base64 = JSON.parse(split[0][2])[0];
|
||||
if (!base64 || base64.length < 100) throw new Error("Google TTS returned empty audio");
|
||||
return base64; // base64 MP3
|
||||
}
|
||||
|
||||
// ── Bing TTS ───────────────────────────────────────────────────
|
||||
async function getBingToken() {
|
||||
const now = Date.now();
|
||||
if (cache.bing.token && now - cache.bing.tokenTime < BING_REFRESH) {
|
||||
return cache.bing.token;
|
||||
}
|
||||
const res = await fetch("https://www.bing.com/translator", {
|
||||
headers: { "User-Agent": UA, "Accept-Language": "vi,en-US;q=0.9,en;q=0.8" },
|
||||
});
|
||||
if (!res.ok) throw new Error(`Bing translator fetch failed: ${res.status}`);
|
||||
const rawCookies = res.headers.getSetCookie?.() || [];
|
||||
const cookie = rawCookies.map((c) => c.split(";")[0]).join("; ");
|
||||
const html = await res.text();
|
||||
const match = html.match(/params_AbusePreventionHelper\s*=\s*\[([^,]+),([^,]+),/);
|
||||
if (!match) throw new Error("Failed to parse Bing token");
|
||||
cache.bing.token = { key: match[1], token: match[2].replace(/"/g, ""), cookie };
|
||||
cache.bing.tokenTime = now;
|
||||
return cache.bing.token;
|
||||
}
|
||||
|
||||
async function bingTtsRequest(text, voiceId, token) {
|
||||
const parts = voiceId.split("-");
|
||||
const xmlLang = parts.slice(0, 2).join("-");
|
||||
const gender = voiceId.toLowerCase().includes("male") ? "Male" : "Female";
|
||||
const ssml = `<speak version='1.0' xml:lang='${xmlLang}'><voice xml:lang='${xmlLang}' xml:gender='${gender}' name='${voiceId}'><prosody rate='0.00%'>${text}</prosody></voice></speak>`;
|
||||
const body = new URLSearchParams();
|
||||
body.append("ssml", ssml);
|
||||
body.append("token", token.token);
|
||||
body.append("key", token.key);
|
||||
return fetch("https://www.bing.com/tfettts?isVertical=1&&IG=1&IID=translator.5023&SFX=1", {
|
||||
method: "POST",
|
||||
body: body.toString(),
|
||||
headers: {
|
||||
"Content-Type": "application/x-www-form-urlencoded",
|
||||
"Accept": "*/*",
|
||||
"Origin": "https://www.bing.com",
|
||||
"Referer": "https://www.bing.com/translator",
|
||||
"User-Agent": UA,
|
||||
...(token.cookie ? { "Cookie": token.cookie } : {}),
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
async function bingTts(text, voiceId) {
|
||||
let token = await getBingToken();
|
||||
let res = await bingTtsRequest(text, voiceId, token);
|
||||
|
||||
// On 429/captcha: invalidate cache and retry once with fresh token
|
||||
if (res.status === 429 || res.status === 403) {
|
||||
cache.bing.token = null;
|
||||
cache.bing.tokenTime = 0;
|
||||
token = await getBingToken();
|
||||
res = await bingTtsRequest(text, voiceId, token);
|
||||
}
|
||||
|
||||
if (!res.ok) {
|
||||
const body = await res.text().catch(() => "");
|
||||
throw new Error(`Bing TTS failed: ${res.status}${body ? " - " + body : ""}`);
|
||||
}
|
||||
const buf = await res.arrayBuffer();
|
||||
if (buf.byteLength < 1024) throw new Error("Bing TTS returned empty audio");
|
||||
return Buffer.from(buf).toString("base64"); // base64 MP3
|
||||
}
|
||||
|
||||
// ── Local Device TTS (macOS `say` + Windows SAPI + ffmpeg) ──────
|
||||
let _localVoicesCache = null;
|
||||
|
||||
async function fetchLocalDeviceVoicesMac() {
|
||||
const { stdout } = await execFileAsync("say", ["-v", "?"]);
|
||||
const voices = [];
|
||||
for (const line of stdout.split("\n")) {
|
||||
// Format: "Name locale # sample"
|
||||
const m = line.match(/^([^\s].*?)\s{2,}([a-z]{2}_[A-Z]{2})/);
|
||||
if (!m) continue;
|
||||
const name = m[1].trim();
|
||||
const locale = m[2].trim(); // e.g. en_US
|
||||
const lang = locale.split("_")[0];
|
||||
const country = locale.split("_")[1];
|
||||
voices.push({ id: name, name, locale, lang, country, gender: "" });
|
||||
}
|
||||
return voices;
|
||||
}
|
||||
|
||||
async function fetchLocalDeviceVoicesWin() {
|
||||
// Use -WindowStyle Hidden to suppress PowerShell popup window
|
||||
const script = [
|
||||
"Add-Type -AssemblyName System.Speech;",
|
||||
"$s = New-Object System.Speech.Synthesis.SpeechSynthesizer;",
|
||||
"$s.GetInstalledVoices() | ForEach-Object { $v = $_.VoiceInfo;",
|
||||
"[PSCustomObject]@{ Name=$v.Name; Culture=$v.Culture.Name; Gender=$v.Gender } }",
|
||||
"| ConvertTo-Json -Compress",
|
||||
].join(" ");
|
||||
const { stdout } = await execFileAsync(
|
||||
"powershell.exe",
|
||||
["-NoProfile", "-NonInteractive", "-WindowStyle", "Hidden", "-Command", script],
|
||||
{ windowsHide: true }
|
||||
);
|
||||
const raw = JSON.parse(stdout.trim() || "[]");
|
||||
// Normalize: single object → array
|
||||
const list = Array.isArray(raw) ? raw : [raw];
|
||||
return list.map((v) => {
|
||||
const culture = v.Culture || "en-US";
|
||||
const [lang, country = ""] = culture.split("-");
|
||||
// Gender: 0=NotSet, 1=Male, 2=Female (SAPI enum)
|
||||
const genderMap = { 1: "Male", 2: "Female", Male: "Male", Female: "Female" };
|
||||
return {
|
||||
id: v.Name,
|
||||
name: v.Name,
|
||||
locale: culture.replace("-", "_"),
|
||||
lang,
|
||||
country,
|
||||
gender: genderMap[v.Gender] || "",
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
export async function fetchLocalDeviceVoices() {
|
||||
if (_localVoicesCache) return _localVoicesCache;
|
||||
try {
|
||||
const voices = process.platform === "win32"
|
||||
? await fetchLocalDeviceVoicesWin()
|
||||
: await fetchLocalDeviceVoicesMac();
|
||||
_localVoicesCache = voices;
|
||||
return voices;
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
async function localDeviceTts(text, voiceId) {
|
||||
const dir = await mkdtemp(join(tmpdir(), "tts-"));
|
||||
const aiffPath = join(dir, "out.aiff");
|
||||
const mp3Path = join(dir, "out.mp3");
|
||||
try {
|
||||
const args = voiceId ? ["-v", voiceId, "-o", aiffPath, text] : ["-o", aiffPath, text];
|
||||
await execFileAsync("say", args);
|
||||
await execFileAsync("ffmpeg", ["-y", "-i", aiffPath, "-codec:a", "libmp3lame", "-qscale:a", "4", mp3Path]);
|
||||
const buf = await readFile(mp3Path);
|
||||
return buf.toString("base64");
|
||||
} finally {
|
||||
await rm(dir, { recursive: true, force: true });
|
||||
}
|
||||
}
|
||||
|
||||
// ── Voices list (Edge TTS public endpoint) ─────────────────────
|
||||
let _voicesCache = null;
|
||||
let _voicesCacheTime = 0;
|
||||
const VOICES_TTL = 24 * 60 * 60 * 1000;
|
||||
|
||||
export async function fetchEdgeTtsVoices() {
|
||||
const now = Date.now();
|
||||
if (_voicesCache && now - _voicesCacheTime < VOICES_TTL) return _voicesCache;
|
||||
const res = await fetch(
|
||||
"https://speech.platform.bing.com/consumer/speech/synthesize/readaloud/voices/list?trustedclienttoken=6A5AA1D4EAFF4E9FB37E23D68491D6F4",
|
||||
{ headers: { "User-Agent": UA } }
|
||||
);
|
||||
if (!res.ok) throw new Error(`Edge TTS voices fetch failed: ${res.status}`);
|
||||
const voices = await res.json();
|
||||
_voicesCache = voices;
|
||||
_voicesCacheTime = now;
|
||||
return voices;
|
||||
}
|
||||
|
||||
// ── ElevenLabs TTS ─────────────────────────────────────────────
|
||||
const _elevenlabsVoicesCache = new Map(); // Cache by API key
|
||||
|
||||
export async function fetchElevenLabsVoices(apiKey) {
|
||||
if (!apiKey) throw new Error("ElevenLabs API key required");
|
||||
|
||||
const now = Date.now();
|
||||
const cached = _elevenlabsVoicesCache.get(apiKey);
|
||||
if (cached && now - cached.time < VOICES_TTL) {
|
||||
return cached.voices;
|
||||
}
|
||||
|
||||
const res = await fetch("https://api.elevenlabs.io/v1/voices", {
|
||||
headers: {
|
||||
"xi-api-key": apiKey,
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
});
|
||||
|
||||
if (!res.ok) throw new Error(`ElevenLabs voices fetch failed: ${res.status}`);
|
||||
const data = await res.json();
|
||||
// Normalize: add lang from labels.language for grouping
|
||||
const voices = (data.voices || []).map((v) => ({
|
||||
...v,
|
||||
lang: v.labels?.language || "en",
|
||||
}));
|
||||
_elevenlabsVoicesCache.set(apiKey, { voices, time: now });
|
||||
return voices;
|
||||
}
|
||||
|
||||
async function elevenlabsTts(text, voiceId, apiKey, modelId = "eleven_flash_v2_5") {
|
||||
const res = await fetch(`https://api.elevenlabs.io/v1/text-to-speech/${voiceId}`, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"xi-api-key": apiKey,
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
text,
|
||||
model_id: modelId,
|
||||
voice_settings: {
|
||||
stability: 0.5,
|
||||
similarity_boost: 0.75,
|
||||
},
|
||||
}),
|
||||
});
|
||||
|
||||
if (!res.ok) {
|
||||
const err = await res.json().catch(() => ({}));
|
||||
throw new Error(err?.detail?.message || `ElevenLabs TTS failed: ${res.status}`);
|
||||
}
|
||||
|
||||
const buf = await res.arrayBuffer();
|
||||
if (buf.byteLength < 1024) throw new Error("ElevenLabs TTS returned empty audio");
|
||||
return Buffer.from(buf).toString("base64");
|
||||
}
|
||||
|
||||
// ── Voice Fetcher Registry (DRY) ───────────────────────────────
|
||||
export const VOICE_FETCHERS = {
|
||||
"edge-tts": fetchEdgeTtsVoices,
|
||||
"local-device": fetchLocalDeviceVoices,
|
||||
"elevenlabs": fetchElevenLabsVoices,
|
||||
// google-tts: uses hardcoded language codes
|
||||
// openai: uses hardcoded voices from providerModels.js
|
||||
};
|
||||
|
||||
// ── OpenRouter TTS (via chat completions + audio modality) ───────────────────
|
||||
async function handleOpenRouterTts({ model, input, credentials, responseFormat = "mp3" }) {
|
||||
if (!credentials?.apiKey) {
|
||||
return createErrorResult(HTTP_STATUS.UNAUTHORIZED, "No OpenRouter API key configured");
|
||||
}
|
||||
|
||||
// model format: "tts-model/voice" e.g. "openai/gpt-4o-mini-tts/alloy"
|
||||
let ttsModel = "openai/gpt-4o-mini-tts";
|
||||
let voice = "alloy";
|
||||
if (model && model.includes("/")) {
|
||||
const lastSlash = model.lastIndexOf("/");
|
||||
const maybVoice = model.slice(lastSlash + 1);
|
||||
const maybeModel = model.slice(0, lastSlash);
|
||||
// voice names are simple lowercase words, model names contain "/"
|
||||
if (maybeModel.includes("/")) {
|
||||
ttsModel = maybeModel;
|
||||
voice = maybVoice;
|
||||
} else {
|
||||
voice = model;
|
||||
}
|
||||
} else if (model) {
|
||||
voice = model;
|
||||
}
|
||||
|
||||
const res = await fetch("https://openrouter.ai/api/v1/chat/completions", {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": `Bearer ${credentials.apiKey}`,
|
||||
"HTTP-Referer": "https://endpoint-proxy.local",
|
||||
"X-Title": "Endpoint Proxy",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: ttsModel,
|
||||
modalities: ["text", "audio"],
|
||||
audio: { voice, format: "wav" },
|
||||
stream: true,
|
||||
messages: [{ role: "user", content: input }],
|
||||
}),
|
||||
});
|
||||
|
||||
if (!res.ok) {
|
||||
const err = await res.json().catch(() => ({}));
|
||||
return createErrorResult(res.status, err?.error?.message || `OpenRouter TTS failed: ${res.status}`);
|
||||
}
|
||||
|
||||
// Parse SSE stream, accumulate base64 audio chunks
|
||||
const chunks = [];
|
||||
const reader = res.body.getReader();
|
||||
const decoder = new TextDecoder();
|
||||
let buffer = "";
|
||||
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) break;
|
||||
buffer += decoder.decode(value, { stream: true });
|
||||
|
||||
const lines = buffer.split("\n");
|
||||
buffer = lines.pop();
|
||||
|
||||
for (const line of lines) {
|
||||
if (!line.startsWith("data: ") || line === "data: [DONE]") continue;
|
||||
try {
|
||||
const json = JSON.parse(line.slice(6));
|
||||
const audioData = json.choices?.[0]?.delta?.audio?.data;
|
||||
if (audioData) chunks.push(audioData);
|
||||
} catch {}
|
||||
}
|
||||
}
|
||||
|
||||
if (chunks.length === 0) {
|
||||
return createErrorResult(HTTP_STATUS.BAD_GATEWAY, "OpenRouter TTS returned no audio data");
|
||||
}
|
||||
|
||||
const base64Audio = chunks.join("");
|
||||
return createTtsResponse(base64Audio, "wav", responseFormat);
|
||||
}
|
||||
|
||||
// ── OpenAI TTS ───────────────────────────────────────────────────────────────
|
||||
async function handleOpenAiTts({ model, input, credentials, responseFormat = "mp3" }) {
|
||||
if (!credentials?.apiKey) {
|
||||
return createErrorResult(HTTP_STATUS.UNAUTHORIZED, "No OpenAI API key configured");
|
||||
}
|
||||
|
||||
// model format: "tts-model/voice" e.g. "tts-1/alloy" or "gpt-4o-mini-tts/nova"
|
||||
let ttsModel = "gpt-4o-mini-tts";
|
||||
let voice = "alloy";
|
||||
if (model && model.includes("/")) {
|
||||
const parts = model.split("/");
|
||||
if (parts.length === 2) {
|
||||
[ttsModel, voice] = parts;
|
||||
}
|
||||
} else if (model) {
|
||||
voice = model;
|
||||
}
|
||||
|
||||
const baseUrl = (credentials.baseUrl || "https://api.openai.com").replace(/\/+$/, "");
|
||||
const res = await fetch(`${baseUrl}/v1/audio/speech`, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": `Bearer ${credentials.apiKey}`,
|
||||
},
|
||||
body: JSON.stringify({ model: ttsModel, voice, input }),
|
||||
});
|
||||
|
||||
if (!res.ok) {
|
||||
const err = await res.json().catch(() => ({}));
|
||||
return createErrorResult(res.status, err?.error?.message || `OpenAI TTS failed: ${res.status}`);
|
||||
}
|
||||
|
||||
const buf = await res.arrayBuffer();
|
||||
const base64 = Buffer.from(buf).toString("base64");
|
||||
return createTtsResponse(base64, "mp3", responseFormat);
|
||||
}
|
||||
|
||||
// ── Generic TTS Format Handlers (config-driven via ttsConfig.format) ──────
|
||||
// Parse `model` string as "modelId/voiceId" or "modelId" (modelId may contain slashes — match against known list)
|
||||
function parseModelVoice(model, defaultModel = "", defaultVoice = "", knownModels = []) {
|
||||
if (!model) return { modelId: defaultModel, voiceId: defaultVoice };
|
||||
// Find longest known model id that prefixes `model`
|
||||
const known = knownModels.map((m) => m.id || m).filter(Boolean).sort((a, b) => b.length - a.length);
|
||||
for (const id of known) {
|
||||
if (model === id) return { modelId: id, voiceId: defaultVoice };
|
||||
if (model.startsWith(`${id}/`)) return { modelId: id, voiceId: model.slice(id.length + 1) };
|
||||
}
|
||||
// Fallback: split on last "/" so "vendor/model/voice" → model="vendor/model", voice="voice"
|
||||
const idx = model.lastIndexOf("/");
|
||||
if (idx > 0) return { modelId: model.slice(0, idx), voiceId: model.slice(idx + 1) };
|
||||
return { modelId: defaultModel || model, voiceId: defaultVoice || model };
|
||||
}
|
||||
|
||||
// Convert upstream Response (binary audio) to { base64, format }
|
||||
async function responseToBase64(res, defaultFormat = "mp3") {
|
||||
const buf = await res.arrayBuffer();
|
||||
if (buf.byteLength < 100) throw new Error("Upstream returned empty audio");
|
||||
const ctype = res.headers.get("content-type") || "";
|
||||
let format = defaultFormat;
|
||||
if (ctype.includes("wav")) format = "wav";
|
||||
else if (ctype.includes("mpeg") || ctype.includes("mp3")) format = "mp3";
|
||||
else if (ctype.includes("ogg")) format = "ogg";
|
||||
return { base64: Buffer.from(buf).toString("base64"), format };
|
||||
}
|
||||
|
||||
async function throwUpstreamError(res) {
|
||||
const text = await res.text().catch(() => "");
|
||||
let msg = `Upstream error (${res.status})`;
|
||||
try {
|
||||
const parsed = JSON.parse(text);
|
||||
msg = parsed?.error?.message || parsed?.message || parsed?.detail?.message || (typeof parsed?.detail === "string" ? parsed.detail : null) || text || msg;
|
||||
} catch { msg = text || msg; }
|
||||
throw new Error(msg);
|
||||
}
|
||||
|
||||
// Hyperbolic: POST { text } → { audio: base64 }
|
||||
async function ttsHyperbolic({ baseUrl, apiKey, text }) {
|
||||
const res = await fetch(baseUrl, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json", "Authorization": `Bearer ${apiKey}` },
|
||||
body: JSON.stringify({ text }),
|
||||
});
|
||||
if (!res.ok) await throwUpstreamError(res);
|
||||
const data = await res.json();
|
||||
return { base64: data.audio, format: "mp3" };
|
||||
}
|
||||
|
||||
// Deepgram: model via query, Token auth, returns binary
|
||||
async function ttsDeepgram({ baseUrl, apiKey, text, modelId }) {
|
||||
const url = new URL(baseUrl);
|
||||
url.searchParams.set("model", modelId || "aura-asteria-en");
|
||||
const res = await fetch(url.toString(), {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json", "Authorization": `Token ${apiKey}` },
|
||||
body: JSON.stringify({ text }),
|
||||
});
|
||||
if (!res.ok) await throwUpstreamError(res);
|
||||
return responseToBase64(res, "mp3");
|
||||
}
|
||||
|
||||
// Nvidia NIM: POST { input: { text }, voice, model } → binary
|
||||
async function ttsNvidia({ baseUrl, apiKey, text, modelId, voiceId }) {
|
||||
const res = await fetch(baseUrl, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json", "Authorization": `Bearer ${apiKey}` },
|
||||
body: JSON.stringify({ input: { text }, voice: voiceId || "default", model: modelId }),
|
||||
});
|
||||
if (!res.ok) await throwUpstreamError(res);
|
||||
return responseToBase64(res, "wav");
|
||||
}
|
||||
|
||||
// HuggingFace: POST {baseUrl}/{modelId} { inputs: text } → binary
|
||||
async function ttsHuggingFace({ baseUrl, apiKey, text, modelId }) {
|
||||
if (!modelId || modelId.includes("..")) throw new Error("Invalid HuggingFace model ID");
|
||||
const res = await fetch(`${baseUrl}/${modelId}`, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json", "Authorization": `Bearer ${apiKey}` },
|
||||
body: JSON.stringify({ inputs: text }),
|
||||
});
|
||||
if (!res.ok) await throwUpstreamError(res);
|
||||
return responseToBase64(res, "wav");
|
||||
}
|
||||
|
||||
// Inworld: POST { text, voiceId, modelId, audioConfig } → JSON { audioContent }
|
||||
async function ttsInworld({ baseUrl, apiKey, text, modelId, voiceId }) {
|
||||
const res = await fetch(baseUrl, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json", "Authorization": `Basic ${apiKey}` },
|
||||
body: JSON.stringify({
|
||||
text,
|
||||
voiceId: voiceId || "Alex",
|
||||
modelId: modelId || "inworld-tts-1.5-mini",
|
||||
audioConfig: { audioEncoding: "MP3" },
|
||||
}),
|
||||
});
|
||||
if (!res.ok) await throwUpstreamError(res);
|
||||
const data = await res.json();
|
||||
if (!data.audioContent) throw new Error("Inworld TTS returned no audio");
|
||||
return { base64: data.audioContent, format: "mp3" };
|
||||
}
|
||||
|
||||
// Cartesia: POST { model_id, transcript, voice, output_format } → binary
|
||||
async function ttsCartesia({ baseUrl, apiKey, text, modelId, voiceId }) {
|
||||
const res = await fetch(baseUrl, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
"X-API-Key": apiKey,
|
||||
"Cartesia-Version": "2024-06-10",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model_id: modelId || "sonic-2",
|
||||
transcript: text,
|
||||
...(voiceId ? { voice: { mode: "id", id: voiceId } } : {}),
|
||||
output_format: { container: "mp3", bit_rate: 128000, sample_rate: 44100 },
|
||||
}),
|
||||
});
|
||||
if (!res.ok) await throwUpstreamError(res);
|
||||
return responseToBase64(res, "mp3");
|
||||
}
|
||||
|
||||
// PlayHT: token format "userId:apiKey", voice = s3 URL
|
||||
async function ttsPlayHt({ baseUrl, apiKey, text, modelId, voiceId }) {
|
||||
const [userId, key] = (apiKey || ":").split(":");
|
||||
const res = await fetch(baseUrl, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
"Accept": "audio/mpeg",
|
||||
"X-USER-ID": userId || "",
|
||||
"Authorization": `Bearer ${key || apiKey}`,
|
||||
},
|
||||
body: JSON.stringify({
|
||||
text,
|
||||
voice: voiceId || "s3://voice-cloning-zero-shot/d9ff78ba-d016-47f6-b0ef-dd630f59414e/female-cs/manifest.json",
|
||||
voice_engine: modelId || "PlayDialog",
|
||||
output_format: "mp3",
|
||||
speed: 1,
|
||||
}),
|
||||
});
|
||||
if (!res.ok) await throwUpstreamError(res);
|
||||
return responseToBase64(res, "mp3");
|
||||
}
|
||||
|
||||
// Coqui (local, noAuth): POST { text, speaker_id } → WAV
|
||||
async function ttsCoqui({ baseUrl, text, voiceId }) {
|
||||
const res = await fetch(baseUrl, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({ text, ...(voiceId ? { speaker_id: voiceId } : {}) }),
|
||||
});
|
||||
if (!res.ok) await throwUpstreamError(res);
|
||||
return responseToBase64(res, "wav");
|
||||
}
|
||||
|
||||
// Tortoise (local, noAuth): POST { text, voice } → binary
|
||||
async function ttsTortoise({ baseUrl, text, voiceId }) {
|
||||
const res = await fetch(baseUrl, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({ text, voice: voiceId || "random" }),
|
||||
});
|
||||
if (!res.ok) await throwUpstreamError(res);
|
||||
return responseToBase64(res, "wav");
|
||||
}
|
||||
|
||||
// OpenAI-compatible (qwen3-tts, openai-compat): POST { model, input, voice } → binary
|
||||
async function ttsOpenAiCompat({ baseUrl, apiKey, text, modelId, voiceId }) {
|
||||
const headers = { "Content-Type": "application/json" };
|
||||
if (apiKey) headers["Authorization"] = `Bearer ${apiKey}`;
|
||||
const res = await fetch(baseUrl, {
|
||||
method: "POST",
|
||||
headers,
|
||||
body: JSON.stringify({
|
||||
model: modelId,
|
||||
input: text,
|
||||
voice: voiceId || "alloy",
|
||||
response_format: "mp3",
|
||||
speed: 1.0,
|
||||
}),
|
||||
});
|
||||
if (!res.ok) await throwUpstreamError(res);
|
||||
return responseToBase64(res, "mp3");
|
||||
}
|
||||
|
||||
// Format → handler dispatcher (DRY)
|
||||
const FORMAT_HANDLERS = {
|
||||
hyperbolic: ttsHyperbolic,
|
||||
deepgram: ttsDeepgram,
|
||||
"nvidia-tts": ttsNvidia,
|
||||
"huggingface-tts": ttsHuggingFace,
|
||||
inworld: ttsInworld,
|
||||
cartesia: ttsCartesia,
|
||||
playht: ttsPlayHt,
|
||||
coqui: ttsCoqui,
|
||||
tortoise: ttsTortoise,
|
||||
openai: ttsOpenAiCompat,
|
||||
};
|
||||
|
||||
// ── TTS Provider Registry (legacy noAuth + special providers) ──────────
|
||||
const TTS_PROVIDERS = {
|
||||
"google-tts": {
|
||||
synthesize: async (text, model) => {
|
||||
const base64 = await googleTts(text, model || "en");
|
||||
return { base64, format: "mp3" };
|
||||
},
|
||||
requiresCredentials: false,
|
||||
},
|
||||
"edge-tts": {
|
||||
synthesize: async (text, model) => {
|
||||
const base64 = await bingTts(text, model || "vi-VN-HoaiMyNeural");
|
||||
return { base64, format: "mp3" };
|
||||
},
|
||||
requiresCredentials: false,
|
||||
},
|
||||
"local-device": {
|
||||
synthesize: async (text, model) => {
|
||||
const base64 = await localDeviceTts(text, model);
|
||||
return { base64, format: "mp3" };
|
||||
},
|
||||
requiresCredentials: false,
|
||||
},
|
||||
"elevenlabs": {
|
||||
synthesize: async (text, model, credentials) => {
|
||||
if (!credentials?.apiKey) throw new Error("ElevenLabs API key required");
|
||||
let modelId = "eleven_flash_v2_5";
|
||||
let voiceId = model;
|
||||
if (model && model.includes("/")) [modelId, voiceId] = model.split("/");
|
||||
const base64 = await elevenlabsTts(text, voiceId, credentials.apiKey, modelId);
|
||||
return { base64, format: "mp3" };
|
||||
},
|
||||
requiresCredentials: true,
|
||||
},
|
||||
"openai": {
|
||||
synthesize: async (text, model, credentials, responseFormat) => {
|
||||
return await handleOpenAiTts({ model, input: text, credentials, responseFormat });
|
||||
},
|
||||
requiresCredentials: true,
|
||||
},
|
||||
"openrouter": {
|
||||
synthesize: async (text, model, credentials, responseFormat) => {
|
||||
return await handleOpenRouterTts({ model, input: text, credentials, responseFormat });
|
||||
},
|
||||
requiresCredentials: true,
|
||||
},
|
||||
};
|
||||
|
||||
// ── Generic dispatcher: providers with ttsConfig.format ────────────────
|
||||
// Resolves to TTS_PROVIDERS first; falls back to ttsConfig.format dispatch.
|
||||
async function synthesizeViaConfig(provider, text, model, credentials) {
|
||||
const { AI_PROVIDERS } = await import("@/shared/constants/providers");
|
||||
const cfg = AI_PROVIDERS[provider]?.ttsConfig;
|
||||
if (!cfg) return null;
|
||||
const handler = FORMAT_HANDLERS[cfg.format];
|
||||
if (!handler) return null;
|
||||
const apiKey = credentials?.apiKey;
|
||||
if (cfg.authType !== "none" && !apiKey) throw new Error(`${provider} API key required`);
|
||||
const defaultModel = cfg.models?.[0]?.id || "";
|
||||
const { modelId, voiceId } = parseModelVoice(model, defaultModel, "", cfg.models || []);
|
||||
return handler({ baseUrl: cfg.baseUrl, apiKey, text, modelId, voiceId });
|
||||
}
|
||||
|
||||
// ── Core handler ───────────────────────────────────────────────
|
||||
/**
|
||||
* Synthesize text to audio.
|
||||
* Synthesize text to audio. Provider logic lives in `./ttsProviders/{id}.js`
|
||||
* or is dispatched generically via `ttsConfig.format`.
|
||||
*
|
||||
* @returns {Promise<{success, response, status?, error?}>}
|
||||
*/
|
||||
export async function handleTtsCore({ provider, model, input, credentials, responseFormat = "mp3" }) {
|
||||
@@ -730,17 +53,17 @@ export async function handleTtsCore({ provider, model, input, credentials, respo
|
||||
return createErrorResult(HTTP_STATUS.BAD_REQUEST, "Missing required field: input");
|
||||
}
|
||||
|
||||
const ttsProvider = TTS_PROVIDERS[provider];
|
||||
|
||||
try {
|
||||
// Legacy/special providers (google-tts, edge-tts, local-device, elevenlabs, openai, openrouter)
|
||||
if (ttsProvider) {
|
||||
const result = await ttsProvider.synthesize(input.trim(), model, credentials, responseFormat);
|
||||
// Special-case adapters (google-tts, edge-tts, local-device, elevenlabs, openai, openrouter)
|
||||
const adapter = getTtsAdapter(provider);
|
||||
if (adapter) {
|
||||
const result = await adapter.synthesize(input.trim(), model, credentials, responseFormat);
|
||||
// Adapter may return a full {success, response} (legacy) or {base64, format}
|
||||
if (result.success !== undefined) return result;
|
||||
return createTtsResponse(result.base64, result.format, responseFormat);
|
||||
}
|
||||
|
||||
// Generic config-driven dispatcher (hyperbolic, deepgram, nvidia, huggingface, inworld, cartesia, playht, coqui, tortoise, qwen, ...)
|
||||
// Generic config-driven (hyperbolic, deepgram, nvidia, huggingface, inworld, cartesia, playht, coqui, tortoise, qwen, ...)
|
||||
const result = await synthesizeViaConfig(provider, input.trim(), model, credentials);
|
||||
if (result) return createTtsResponse(result.base64, result.format, responseFormat);
|
||||
|
||||
|
||||
39
open-sse/handlers/ttsProviders/_base.js
Normal file
39
open-sse/handlers/ttsProviders/_base.js
Normal file
@@ -0,0 +1,39 @@
|
||||
// Shared TTS helpers
|
||||
import { Buffer } from "node:buffer";
|
||||
|
||||
export const UA = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Safari/537.36";
|
||||
|
||||
// Convert upstream Response (binary audio) to { base64, format }
|
||||
export async function responseToBase64(res, defaultFormat = "mp3") {
|
||||
const buf = await res.arrayBuffer();
|
||||
if (buf.byteLength < 100) throw new Error("Upstream returned empty audio");
|
||||
const ctype = res.headers.get("content-type") || "";
|
||||
let format = defaultFormat;
|
||||
if (ctype.includes("wav")) format = "wav";
|
||||
else if (ctype.includes("mpeg") || ctype.includes("mp3")) format = "mp3";
|
||||
else if (ctype.includes("ogg")) format = "ogg";
|
||||
return { base64: Buffer.from(buf).toString("base64"), format };
|
||||
}
|
||||
|
||||
export async function throwUpstreamError(res) {
|
||||
const text = await res.text().catch(() => "");
|
||||
let msg = `Upstream error (${res.status})`;
|
||||
try {
|
||||
const parsed = JSON.parse(text);
|
||||
msg = parsed?.error?.message || parsed?.message || parsed?.detail?.message || (typeof parsed?.detail === "string" ? parsed.detail : null) || text || msg;
|
||||
} catch { msg = text || msg; }
|
||||
throw new Error(msg);
|
||||
}
|
||||
|
||||
// Parse `model` string as "modelId/voiceId" — match against known model list (longest prefix wins)
|
||||
export function parseModelVoice(model, defaultModel = "", defaultVoice = "", knownModels = []) {
|
||||
if (!model) return { modelId: defaultModel, voiceId: defaultVoice };
|
||||
const known = knownModels.map((m) => m.id || m).filter(Boolean).sort((a, b) => b.length - a.length);
|
||||
for (const id of known) {
|
||||
if (model === id) return { modelId: id, voiceId: defaultVoice };
|
||||
if (model.startsWith(`${id}/`)) return { modelId: id, voiceId: model.slice(id.length + 1) };
|
||||
}
|
||||
const idx = model.lastIndexOf("/");
|
||||
if (idx > 0) return { modelId: model.slice(0, idx), voiceId: model.slice(idx + 1) };
|
||||
return { modelId: defaultModel || model, voiceId: defaultVoice || model };
|
||||
}
|
||||
89
open-sse/handlers/ttsProviders/edgeTts.js
Normal file
89
open-sse/handlers/ttsProviders/edgeTts.js
Normal file
@@ -0,0 +1,89 @@
|
||||
// Microsoft Edge / Bing TTS (no auth) — via Bing translator endpoint
|
||||
import { Buffer } from "node:buffer";
|
||||
import { UA } from "./_base.js";
|
||||
|
||||
const REFRESH_MS = 5 * 60 * 1000; // token TTL ~1h, refresh early
|
||||
const VOICES_TTL = 24 * 60 * 60 * 1000;
|
||||
|
||||
const cache = { token: null, tokenTime: 0 };
|
||||
let _voicesCache = null;
|
||||
let _voicesCacheTime = 0;
|
||||
|
||||
async function getToken() {
|
||||
const now = Date.now();
|
||||
if (cache.token && now - cache.tokenTime < REFRESH_MS) return cache.token;
|
||||
const res = await fetch("https://www.bing.com/translator", {
|
||||
headers: { "User-Agent": UA, "Accept-Language": "vi,en-US;q=0.9,en;q=0.8" },
|
||||
});
|
||||
if (!res.ok) throw new Error(`Bing translator fetch failed: ${res.status}`);
|
||||
const rawCookies = res.headers.getSetCookie?.() || [];
|
||||
const cookie = rawCookies.map((c) => c.split(";")[0]).join("; ");
|
||||
const html = await res.text();
|
||||
const match = html.match(/params_AbusePreventionHelper\s*=\s*\[([^,]+),([^,]+),/);
|
||||
if (!match) throw new Error("Failed to parse Bing token");
|
||||
cache.token = { key: match[1], token: match[2].replace(/"/g, ""), cookie };
|
||||
cache.tokenTime = now;
|
||||
return cache.token;
|
||||
}
|
||||
|
||||
async function ttsRequest(text, voiceId, token) {
|
||||
const parts = voiceId.split("-");
|
||||
const xmlLang = parts.slice(0, 2).join("-");
|
||||
const gender = voiceId.toLowerCase().includes("male") ? "Male" : "Female";
|
||||
const ssml = `<speak version='1.0' xml:lang='${xmlLang}'><voice xml:lang='${xmlLang}' xml:gender='${gender}' name='${voiceId}'><prosody rate='0.00%'>${text}</prosody></voice></speak>`;
|
||||
const body = new URLSearchParams();
|
||||
body.append("ssml", ssml);
|
||||
body.append("token", token.token);
|
||||
body.append("key", token.key);
|
||||
return fetch("https://www.bing.com/tfettts?isVertical=1&&IG=1&IID=translator.5023&SFX=1", {
|
||||
method: "POST",
|
||||
body: body.toString(),
|
||||
headers: {
|
||||
"Content-Type": "application/x-www-form-urlencoded",
|
||||
"Accept": "*/*",
|
||||
"Origin": "https://www.bing.com",
|
||||
"Referer": "https://www.bing.com/translator",
|
||||
"User-Agent": UA,
|
||||
...(token.cookie ? { "Cookie": token.cookie } : {}),
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
export async function fetchEdgeTtsVoices() {
|
||||
const now = Date.now();
|
||||
if (_voicesCache && now - _voicesCacheTime < VOICES_TTL) return _voicesCache;
|
||||
const res = await fetch(
|
||||
"https://speech.platform.bing.com/consumer/speech/synthesize/readaloud/voices/list?trustedclienttoken=6A5AA1D4EAFF4E9FB37E23D68491D6F4",
|
||||
{ headers: { "User-Agent": UA } }
|
||||
);
|
||||
if (!res.ok) throw new Error(`Edge TTS voices fetch failed: ${res.status}`);
|
||||
const voices = await res.json();
|
||||
_voicesCache = voices;
|
||||
_voicesCacheTime = now;
|
||||
return voices;
|
||||
}
|
||||
|
||||
export default {
|
||||
noAuth: true,
|
||||
async synthesize(text, model) {
|
||||
const voiceId = model || "vi-VN-HoaiMyNeural";
|
||||
let token = await getToken();
|
||||
let res = await ttsRequest(text, voiceId, token);
|
||||
|
||||
// 429/403: invalidate cache and retry once
|
||||
if (res.status === 429 || res.status === 403) {
|
||||
cache.token = null;
|
||||
cache.tokenTime = 0;
|
||||
token = await getToken();
|
||||
res = await ttsRequest(text, voiceId, token);
|
||||
}
|
||||
|
||||
if (!res.ok) {
|
||||
const body = await res.text().catch(() => "");
|
||||
throw new Error(`Bing TTS failed: ${res.status}${body ? " - " + body : ""}`);
|
||||
}
|
||||
const buf = await res.arrayBuffer();
|
||||
if (buf.byteLength < 1024) throw new Error("Bing TTS returned empty audio");
|
||||
return { base64: Buffer.from(buf).toString("base64"), format: "mp3" };
|
||||
},
|
||||
};
|
||||
48
open-sse/handlers/ttsProviders/elevenlabs.js
Normal file
48
open-sse/handlers/ttsProviders/elevenlabs.js
Normal file
@@ -0,0 +1,48 @@
|
||||
// ElevenLabs TTS — voice id with optional model_id prefix
|
||||
import { Buffer } from "node:buffer";
|
||||
|
||||
const VOICES_TTL = 24 * 60 * 60 * 1000;
|
||||
const _voicesCache = new Map(); // by API key
|
||||
|
||||
export async function fetchElevenLabsVoices(apiKey) {
|
||||
if (!apiKey) throw new Error("ElevenLabs API key required");
|
||||
const now = Date.now();
|
||||
const cached = _voicesCache.get(apiKey);
|
||||
if (cached && now - cached.time < VOICES_TTL) return cached.voices;
|
||||
|
||||
const res = await fetch("https://api.elevenlabs.io/v1/voices", {
|
||||
headers: { "xi-api-key": apiKey, "Content-Type": "application/json" },
|
||||
});
|
||||
if (!res.ok) throw new Error(`ElevenLabs voices fetch failed: ${res.status}`);
|
||||
const data = await res.json();
|
||||
// Normalize: derive lang from labels for grouping
|
||||
const voices = (data.voices || []).map((v) => ({ ...v, lang: v.labels?.language || "en" }));
|
||||
_voicesCache.set(apiKey, { voices, time: now });
|
||||
return voices;
|
||||
}
|
||||
|
||||
export default {
|
||||
async synthesize(text, model, credentials) {
|
||||
if (!credentials?.apiKey) throw new Error("ElevenLabs API key required");
|
||||
let modelId = "eleven_flash_v2_5";
|
||||
let voiceId = model;
|
||||
if (model && model.includes("/")) [modelId, voiceId] = model.split("/");
|
||||
|
||||
const res = await fetch(`https://api.elevenlabs.io/v1/text-to-speech/${voiceId}`, {
|
||||
method: "POST",
|
||||
headers: { "xi-api-key": credentials.apiKey, "Content-Type": "application/json" },
|
||||
body: JSON.stringify({
|
||||
text,
|
||||
model_id: modelId,
|
||||
voice_settings: { stability: 0.5, similarity_boost: 0.75 },
|
||||
}),
|
||||
});
|
||||
if (!res.ok) {
|
||||
const err = await res.json().catch(() => ({}));
|
||||
throw new Error(err?.detail?.message || `ElevenLabs TTS failed: ${res.status}`);
|
||||
}
|
||||
const buf = await res.arrayBuffer();
|
||||
if (buf.byteLength < 1024) throw new Error("ElevenLabs TTS returned empty audio");
|
||||
return { base64: Buffer.from(buf).toString("base64"), format: "mp3" };
|
||||
},
|
||||
};
|
||||
167
open-sse/handlers/ttsProviders/genericFormats.js
Normal file
167
open-sse/handlers/ttsProviders/genericFormats.js
Normal file
@@ -0,0 +1,167 @@
|
||||
// Generic config-driven TTS handlers — dispatched by ttsConfig.format.
|
||||
// Each handler accepts { baseUrl, apiKey, text, modelId, voiceId } and returns { base64, format }.
|
||||
import { responseToBase64, throwUpstreamError } from "./_base.js";
|
||||
|
||||
// Hyperbolic: POST { text } → { audio: base64 }
|
||||
async function hyperbolic({ baseUrl, apiKey, text }) {
|
||||
const res = await fetch(baseUrl, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json", "Authorization": `Bearer ${apiKey}` },
|
||||
body: JSON.stringify({ text }),
|
||||
});
|
||||
if (!res.ok) await throwUpstreamError(res);
|
||||
const data = await res.json();
|
||||
return { base64: data.audio, format: "mp3" };
|
||||
}
|
||||
|
||||
// Deepgram: model via query, Token auth, returns binary
|
||||
async function deepgram({ baseUrl, apiKey, text, modelId }) {
|
||||
const url = new URL(baseUrl);
|
||||
url.searchParams.set("model", modelId || "aura-asteria-en");
|
||||
const res = await fetch(url.toString(), {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json", "Authorization": `Token ${apiKey}` },
|
||||
body: JSON.stringify({ text }),
|
||||
});
|
||||
if (!res.ok) await throwUpstreamError(res);
|
||||
return responseToBase64(res, "mp3");
|
||||
}
|
||||
|
||||
// Nvidia NIM: POST { input: { text }, voice, model } → binary
|
||||
async function nvidia({ baseUrl, apiKey, text, modelId, voiceId }) {
|
||||
const res = await fetch(baseUrl, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json", "Authorization": `Bearer ${apiKey}` },
|
||||
body: JSON.stringify({ input: { text }, voice: voiceId || "default", model: modelId }),
|
||||
});
|
||||
if (!res.ok) await throwUpstreamError(res);
|
||||
return responseToBase64(res, "wav");
|
||||
}
|
||||
|
||||
// HuggingFace: POST {baseUrl}/{modelId} { inputs: text } → binary
|
||||
async function huggingface({ baseUrl, apiKey, text, modelId }) {
|
||||
if (!modelId || modelId.includes("..")) throw new Error("Invalid HuggingFace model ID");
|
||||
const res = await fetch(`${baseUrl}/${modelId}`, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json", "Authorization": `Bearer ${apiKey}` },
|
||||
body: JSON.stringify({ inputs: text }),
|
||||
});
|
||||
if (!res.ok) await throwUpstreamError(res);
|
||||
return responseToBase64(res, "wav");
|
||||
}
|
||||
|
||||
// Inworld: Basic auth, JSON { audioContent }
|
||||
async function inworld({ baseUrl, apiKey, text, modelId, voiceId }) {
|
||||
const res = await fetch(baseUrl, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json", "Authorization": `Basic ${apiKey}` },
|
||||
body: JSON.stringify({
|
||||
text,
|
||||
voiceId: voiceId || "Alex",
|
||||
modelId: modelId || "inworld-tts-1.5-mini",
|
||||
audioConfig: { audioEncoding: "MP3" },
|
||||
}),
|
||||
});
|
||||
if (!res.ok) await throwUpstreamError(res);
|
||||
const data = await res.json();
|
||||
if (!data.audioContent) throw new Error("Inworld TTS returned no audio");
|
||||
return { base64: data.audioContent, format: "mp3" };
|
||||
}
|
||||
|
||||
// Cartesia: X-API-Key header
|
||||
async function cartesia({ baseUrl, apiKey, text, modelId, voiceId }) {
|
||||
const res = await fetch(baseUrl, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
"X-API-Key": apiKey,
|
||||
"Cartesia-Version": "2024-06-10",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model_id: modelId || "sonic-2",
|
||||
transcript: text,
|
||||
...(voiceId ? { voice: { mode: "id", id: voiceId } } : {}),
|
||||
output_format: { container: "mp3", bit_rate: 128000, sample_rate: 44100 },
|
||||
}),
|
||||
});
|
||||
if (!res.ok) await throwUpstreamError(res);
|
||||
return responseToBase64(res, "mp3");
|
||||
}
|
||||
|
||||
// PlayHT: token format "userId:apiKey", voice = s3 URL
|
||||
async function playht({ baseUrl, apiKey, text, modelId, voiceId }) {
|
||||
const [userId, key] = (apiKey || ":").split(":");
|
||||
const res = await fetch(baseUrl, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
"Accept": "audio/mpeg",
|
||||
"X-USER-ID": userId || "",
|
||||
"Authorization": `Bearer ${key || apiKey}`,
|
||||
},
|
||||
body: JSON.stringify({
|
||||
text,
|
||||
voice: voiceId || "s3://voice-cloning-zero-shot/d9ff78ba-d016-47f6-b0ef-dd630f59414e/female-cs/manifest.json",
|
||||
voice_engine: modelId || "PlayDialog",
|
||||
output_format: "mp3",
|
||||
speed: 1,
|
||||
}),
|
||||
});
|
||||
if (!res.ok) await throwUpstreamError(res);
|
||||
return responseToBase64(res, "mp3");
|
||||
}
|
||||
|
||||
// Coqui (local, noAuth): POST { text, speaker_id } → WAV
|
||||
async function coqui({ baseUrl, text, voiceId }) {
|
||||
const res = await fetch(baseUrl, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({ text, ...(voiceId ? { speaker_id: voiceId } : {}) }),
|
||||
});
|
||||
if (!res.ok) await throwUpstreamError(res);
|
||||
return responseToBase64(res, "wav");
|
||||
}
|
||||
|
||||
// Tortoise (local, noAuth)
|
||||
async function tortoise({ baseUrl, text, voiceId }) {
|
||||
const res = await fetch(baseUrl, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({ text, voice: voiceId || "random" }),
|
||||
});
|
||||
if (!res.ok) await throwUpstreamError(res);
|
||||
return responseToBase64(res, "wav");
|
||||
}
|
||||
|
||||
// OpenAI-compatible upstream (qwen3-tts, etc.)
|
||||
async function openaiCompat({ baseUrl, apiKey, text, modelId, voiceId }) {
|
||||
const headers = { "Content-Type": "application/json" };
|
||||
if (apiKey) headers["Authorization"] = `Bearer ${apiKey}`;
|
||||
const res = await fetch(baseUrl, {
|
||||
method: "POST",
|
||||
headers,
|
||||
body: JSON.stringify({
|
||||
model: modelId,
|
||||
input: text,
|
||||
voice: voiceId || "alloy",
|
||||
response_format: "mp3",
|
||||
speed: 1.0,
|
||||
}),
|
||||
});
|
||||
if (!res.ok) await throwUpstreamError(res);
|
||||
return responseToBase64(res, "mp3");
|
||||
}
|
||||
|
||||
// format → handler dispatcher
|
||||
export const FORMAT_HANDLERS = {
|
||||
hyperbolic,
|
||||
deepgram,
|
||||
"nvidia-tts": nvidia,
|
||||
"huggingface-tts": huggingface,
|
||||
inworld,
|
||||
cartesia,
|
||||
playht,
|
||||
coqui,
|
||||
tortoise,
|
||||
openai: openaiCompat,
|
||||
};
|
||||
54
open-sse/handlers/ttsProviders/googleTts.js
Normal file
54
open-sse/handlers/ttsProviders/googleTts.js
Normal file
@@ -0,0 +1,54 @@
|
||||
// Google Translate TTS (no auth) — scrape token + batchexecute RPC
|
||||
import { UA } from "./_base.js";
|
||||
|
||||
const REFRESH_MS = 11 * 60 * 1000;
|
||||
const cache = { token: null, tokenTime: 0 };
|
||||
let _idx = 0;
|
||||
|
||||
async function getToken() {
|
||||
const now = Date.now();
|
||||
if (cache.token && now - cache.tokenTime < REFRESH_MS) return cache.token;
|
||||
const res = await fetch("https://translate.google.com/", { headers: { "User-Agent": UA } });
|
||||
if (!res.ok) throw new Error(`Google translate fetch failed: ${res.status}`);
|
||||
const html = await res.text();
|
||||
const fSid = html.match(/"FdrFJe":"(.*?)"/)?.[1];
|
||||
const bl = html.match(/"cfb2h":"(.*?)"/)?.[1];
|
||||
if (!fSid || !bl) throw new Error("Failed to parse Google token");
|
||||
cache.token = { "f.sid": fSid, bl };
|
||||
cache.tokenTime = now;
|
||||
return cache.token;
|
||||
}
|
||||
|
||||
export default {
|
||||
noAuth: true,
|
||||
async synthesize(text, model) {
|
||||
const lang = model || "en";
|
||||
const token = await getToken();
|
||||
const cleanText = text.replace(/[@^*()\\/\-_+=><"'\u201c\u201d\u3010\u3011]/g, " ").replaceAll(", ", ". ");
|
||||
const rpcId = "jQ1olc";
|
||||
const reqId = (++_idx * 100000) + Math.floor(1000 + Math.random() * 9000);
|
||||
const query = new URLSearchParams({
|
||||
rpcids: rpcId,
|
||||
"f.sid": token["f.sid"],
|
||||
bl: token.bl,
|
||||
hl: lang,
|
||||
"soc-app": 1, "soc-platform": 1, "soc-device": 1,
|
||||
_reqid: reqId,
|
||||
rt: "c",
|
||||
});
|
||||
const payload = [cleanText, lang, null, "undefined", [0]];
|
||||
const body = new URLSearchParams();
|
||||
body.append("f.req", JSON.stringify([[[rpcId, JSON.stringify(payload), null, "generic"]]]));
|
||||
const res = await fetch(`https://translate.google.com/_/TranslateWebserverUi/data/batchexecute?${query}`, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/x-www-form-urlencoded", "Referer": "https://translate.google.com/" },
|
||||
body: body.toString(),
|
||||
});
|
||||
if (!res.ok) throw new Error(`Google TTS failed: ${res.status}`);
|
||||
const data = await res.text();
|
||||
const split = JSON.parse(data.split("\n")[3]);
|
||||
const base64 = JSON.parse(split[0][2])[0];
|
||||
if (!base64 || base64.length < 100) throw new Error("Google TTS returned empty audio");
|
||||
return { base64, format: "mp3" };
|
||||
},
|
||||
};
|
||||
47
open-sse/handlers/ttsProviders/index.js
Normal file
47
open-sse/handlers/ttsProviders/index.js
Normal file
@@ -0,0 +1,47 @@
|
||||
// TTS provider registry
|
||||
import googleTts from "./googleTts.js";
|
||||
import edgeTts, { fetchEdgeTtsVoices } from "./edgeTts.js";
|
||||
import localDevice, { fetchLocalDeviceVoices } from "./localDevice.js";
|
||||
import elevenlabs, { fetchElevenLabsVoices } from "./elevenlabs.js";
|
||||
import openai from "./openai.js";
|
||||
import openrouter from "./openrouter.js";
|
||||
import { FORMAT_HANDLERS } from "./genericFormats.js";
|
||||
import { parseModelVoice } from "./_base.js";
|
||||
|
||||
// Special providers with custom synthesize() logic
|
||||
const SPECIAL_ADAPTERS = {
|
||||
"google-tts": googleTts,
|
||||
"edge-tts": edgeTts,
|
||||
"local-device": localDevice,
|
||||
elevenlabs,
|
||||
openai,
|
||||
openrouter,
|
||||
};
|
||||
|
||||
export function getTtsAdapter(provider) {
|
||||
return SPECIAL_ADAPTERS[provider] || null;
|
||||
}
|
||||
|
||||
// Generic config-driven dispatcher (uses ttsConfig.format)
|
||||
export async function synthesizeViaConfig(provider, text, model, credentials) {
|
||||
const { AI_PROVIDERS } = await import("@/shared/constants/providers");
|
||||
const cfg = AI_PROVIDERS[provider]?.ttsConfig;
|
||||
if (!cfg) return null;
|
||||
const handler = FORMAT_HANDLERS[cfg.format];
|
||||
if (!handler) return null;
|
||||
const apiKey = credentials?.apiKey;
|
||||
if (cfg.authType !== "none" && !apiKey) throw new Error(`${provider} API key required`);
|
||||
const defaultModel = cfg.models?.[0]?.id || "";
|
||||
const { modelId, voiceId } = parseModelVoice(model, defaultModel, "", cfg.models || []);
|
||||
return handler({ baseUrl: cfg.baseUrl, apiKey, text, modelId, voiceId });
|
||||
}
|
||||
|
||||
// Voice fetchers (used by /api/media-providers/tts/voices route)
|
||||
export const VOICE_FETCHERS = {
|
||||
"edge-tts": fetchEdgeTtsVoices,
|
||||
"local-device": fetchLocalDeviceVoices,
|
||||
elevenlabs: fetchElevenLabsVoices,
|
||||
};
|
||||
|
||||
// Re-export for backward compat
|
||||
export { fetchEdgeTtsVoices, fetchLocalDeviceVoices, fetchElevenLabsVoices };
|
||||
87
open-sse/handlers/ttsProviders/localDevice.js
Normal file
87
open-sse/handlers/ttsProviders/localDevice.js
Normal file
@@ -0,0 +1,87 @@
|
||||
// Local device TTS — macOS `say` + Windows SAPI + ffmpeg
|
||||
import { execFile } from "node:child_process";
|
||||
import { promisify } from "node:util";
|
||||
import { mkdtemp, readFile, rm } from "node:fs/promises";
|
||||
import { tmpdir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
|
||||
const execFileAsync = promisify(execFile);
|
||||
|
||||
let _voicesCache = null;
|
||||
|
||||
async function fetchVoicesMac() {
|
||||
const { stdout } = await execFileAsync("say", ["-v", "?"]);
|
||||
const voices = [];
|
||||
for (const line of stdout.split("\n")) {
|
||||
const m = line.match(/^([^\s].*?)\s{2,}([a-z]{2}_[A-Z]{2})/);
|
||||
if (!m) continue;
|
||||
const name = m[1].trim();
|
||||
const locale = m[2].trim();
|
||||
const lang = locale.split("_")[0];
|
||||
const country = locale.split("_")[1];
|
||||
voices.push({ id: name, name, locale, lang, country, gender: "" });
|
||||
}
|
||||
return voices;
|
||||
}
|
||||
|
||||
async function fetchVoicesWin() {
|
||||
const script = [
|
||||
"Add-Type -AssemblyName System.Speech;",
|
||||
"$s = New-Object System.Speech.Synthesis.SpeechSynthesizer;",
|
||||
"$s.GetInstalledVoices() | ForEach-Object { $v = $_.VoiceInfo;",
|
||||
"[PSCustomObject]@{ Name=$v.Name; Culture=$v.Culture.Name; Gender=$v.Gender } }",
|
||||
"| ConvertTo-Json -Compress",
|
||||
].join(" ");
|
||||
const { stdout } = await execFileAsync(
|
||||
"powershell.exe",
|
||||
["-NoProfile", "-NonInteractive", "-WindowStyle", "Hidden", "-Command", script],
|
||||
{ windowsHide: true }
|
||||
);
|
||||
const raw = JSON.parse(stdout.trim() || "[]");
|
||||
const list = Array.isArray(raw) ? raw : [raw];
|
||||
return list.map((v) => {
|
||||
const culture = v.Culture || "en-US";
|
||||
const [lang, country = ""] = culture.split("-");
|
||||
const genderMap = { 1: "Male", 2: "Female", Male: "Male", Female: "Female" };
|
||||
return {
|
||||
id: v.Name, name: v.Name,
|
||||
locale: culture.replace("-", "_"),
|
||||
lang, country,
|
||||
gender: genderMap[v.Gender] || "",
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
export async function fetchLocalDeviceVoices() {
|
||||
if (_voicesCache) return _voicesCache;
|
||||
try {
|
||||
const voices = process.platform === "win32" ? await fetchVoicesWin() : await fetchVoicesMac();
|
||||
_voicesCache = voices;
|
||||
return voices;
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
async function synthesizeMacOrWin(text, voiceId) {
|
||||
const dir = await mkdtemp(join(tmpdir(), "tts-"));
|
||||
const aiffPath = join(dir, "out.aiff");
|
||||
const mp3Path = join(dir, "out.mp3");
|
||||
try {
|
||||
const args = voiceId ? ["-v", voiceId, "-o", aiffPath, text] : ["-o", aiffPath, text];
|
||||
await execFileAsync("say", args);
|
||||
await execFileAsync("ffmpeg", ["-y", "-i", aiffPath, "-codec:a", "libmp3lame", "-qscale:a", "4", mp3Path]);
|
||||
const buf = await readFile(mp3Path);
|
||||
return buf.toString("base64");
|
||||
} finally {
|
||||
await rm(dir, { recursive: true, force: true });
|
||||
}
|
||||
}
|
||||
|
||||
export default {
|
||||
noAuth: true,
|
||||
async synthesize(text, model) {
|
||||
const base64 = await synthesizeMacOrWin(text, model);
|
||||
return { base64, format: "mp3" };
|
||||
},
|
||||
};
|
||||
30
open-sse/handlers/ttsProviders/openai.js
Normal file
30
open-sse/handlers/ttsProviders/openai.js
Normal file
@@ -0,0 +1,30 @@
|
||||
// OpenAI TTS — model format: "tts-model/voice"
|
||||
import { Buffer } from "node:buffer";
|
||||
|
||||
export default {
|
||||
async synthesize(text, model, credentials) {
|
||||
if (!credentials?.apiKey) throw new Error("No OpenAI API key configured");
|
||||
|
||||
let ttsModel = "gpt-4o-mini-tts";
|
||||
let voice = "alloy";
|
||||
if (model && model.includes("/")) {
|
||||
const parts = model.split("/");
|
||||
if (parts.length === 2) [ttsModel, voice] = parts;
|
||||
} else if (model) {
|
||||
voice = model;
|
||||
}
|
||||
|
||||
const baseUrl = (credentials.baseUrl || "https://api.openai.com").replace(/\/+$/, "");
|
||||
const res = await fetch(`${baseUrl}/v1/audio/speech`, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json", "Authorization": `Bearer ${credentials.apiKey}` },
|
||||
body: JSON.stringify({ model: ttsModel, voice, input: text }),
|
||||
});
|
||||
if (!res.ok) {
|
||||
const err = await res.json().catch(() => ({}));
|
||||
throw new Error(err?.error?.message || `OpenAI TTS failed: ${res.status}`);
|
||||
}
|
||||
const buf = await res.arrayBuffer();
|
||||
return { base64: Buffer.from(buf).toString("base64"), format: "mp3" };
|
||||
},
|
||||
};
|
||||
70
open-sse/handlers/ttsProviders/openrouter.js
Normal file
70
open-sse/handlers/ttsProviders/openrouter.js
Normal file
@@ -0,0 +1,70 @@
|
||||
// OpenRouter TTS — via chat completions + audio modality (SSE stream)
|
||||
export default {
|
||||
async synthesize(text, model, credentials) {
|
||||
if (!credentials?.apiKey) throw new Error("No OpenRouter API key configured");
|
||||
|
||||
// model format: "tts-model/voice" e.g. "openai/gpt-4o-mini-tts/alloy"
|
||||
let ttsModel = "openai/gpt-4o-mini-tts";
|
||||
let voice = "alloy";
|
||||
if (model && model.includes("/")) {
|
||||
const lastSlash = model.lastIndexOf("/");
|
||||
const maybVoice = model.slice(lastSlash + 1);
|
||||
const maybeModel = model.slice(0, lastSlash);
|
||||
if (maybeModel.includes("/")) {
|
||||
ttsModel = maybeModel;
|
||||
voice = maybVoice;
|
||||
} else {
|
||||
voice = model;
|
||||
}
|
||||
} else if (model) {
|
||||
voice = model;
|
||||
}
|
||||
|
||||
const res = await fetch("https://openrouter.ai/api/v1/chat/completions", {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": `Bearer ${credentials.apiKey}`,
|
||||
"HTTP-Referer": "https://endpoint-proxy.local",
|
||||
"X-Title": "Endpoint Proxy",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: ttsModel,
|
||||
modalities: ["text", "audio"],
|
||||
audio: { voice, format: "wav" },
|
||||
stream: true,
|
||||
messages: [{ role: "user", content: text }],
|
||||
}),
|
||||
});
|
||||
|
||||
if (!res.ok) {
|
||||
const err = await res.json().catch(() => ({}));
|
||||
throw new Error(err?.error?.message || `OpenRouter TTS failed: ${res.status}`);
|
||||
}
|
||||
|
||||
// Parse SSE stream, accumulate base64 audio chunks
|
||||
const chunks = [];
|
||||
const reader = res.body.getReader();
|
||||
const decoder = new TextDecoder();
|
||||
let buffer = "";
|
||||
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) break;
|
||||
buffer += decoder.decode(value, { stream: true });
|
||||
const lines = buffer.split("\n");
|
||||
buffer = lines.pop();
|
||||
for (const line of lines) {
|
||||
if (!line.startsWith("data: ") || line === "data: [DONE]") continue;
|
||||
try {
|
||||
const json = JSON.parse(line.slice(6));
|
||||
const audioData = json.choices?.[0]?.delta?.audio?.data;
|
||||
if (audioData) chunks.push(audioData);
|
||||
} catch {}
|
||||
}
|
||||
}
|
||||
|
||||
if (chunks.length === 0) throw new Error("OpenRouter TTS returned no audio data");
|
||||
return { base64: chunks.join(""), format: "wav" };
|
||||
},
|
||||
};
|
||||
@@ -67,6 +67,23 @@ const ALIAS_TO_PROVIDER_ID = {
|
||||
"xiaomi-mimo": "xiaomi-mimo",
|
||||
cf: "cloudflare-ai",
|
||||
"cloudflare-ai": "cloudflare-ai",
|
||||
// Image/video providers
|
||||
fal: "fal-ai",
|
||||
"fal-ai": "fal-ai",
|
||||
stability: "stability-ai",
|
||||
"stability-ai": "stability-ai",
|
||||
bfl: "black-forest-labs",
|
||||
"black-forest-labs": "black-forest-labs",
|
||||
recraft: "recraft",
|
||||
topaz: "topaz",
|
||||
runway: "runwayml",
|
||||
runwayml: "runwayml",
|
||||
// Embedding/rerank
|
||||
jina: "jina-ai",
|
||||
"jina-ai": "jina-ai",
|
||||
// TTS
|
||||
polly: "aws-polly",
|
||||
"aws-polly": "aws-polly",
|
||||
};
|
||||
|
||||
/**
|
||||
|
||||
Reference in New Issue
Block a user