Update version to 0.4.9, enhance README with Trendshift badge, and add new embedding models to providerModels.js. Refactor TTS handling to support additional providers and improve API key validation for media providers.

This commit is contained in:
decolua
2026-04-29 11:34:39 +07:00
parent e8aa5e2222
commit 512e3de371
20 changed files with 586 additions and 83 deletions

View File

@@ -105,6 +105,9 @@ export const PROVIDER_MODELS = {
{ id: "grok-code-fast-1", name: "Grok Code Fast 1" },
{ id: "oswe-vscode-prime", name: "Raptor Mini" },
{ id: "goldeneye-free-auto", name: "GoldenEye" },
// GitHub Copilot - Embedding models
{ id: "text-embedding-3-small", name: "Text Embedding 3 Small (GitHub)", type: "embedding" },
{ id: "text-embedding-3-large", name: "Text Embedding 3 Large (GitHub)", type: "embedding" },
],
kr: [ // Kiro AI
// { id: "claude-opus-4.5", name: "Claude Opus 4.5" },
@@ -378,6 +381,7 @@ export const PROVIDER_MODELS = {
{ id: "mistral-large-latest", name: "Mistral Large 3" },
{ id: "codestral-latest", name: "Codestral" },
{ id: "mistral-medium-latest", name: "Mistral Medium 3" },
{ id: "mistral-embed", name: "Mistral Embed", type: "embedding" },
],
perplexity: [
{ id: "sonar-pro", name: "Sonar Pro" },
@@ -388,11 +392,14 @@ export const PROVIDER_MODELS = {
{ id: "deepseek-ai/DeepSeek-R1", name: "DeepSeek R1" },
{ id: "Qwen/Qwen3-235B-A22B", name: "Qwen3 235B" },
{ id: "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", name: "Llama 4 Maverick" },
{ id: "BAAI/bge-large-en-v1.5", name: "BGE Large EN v1.5", type: "embedding" },
{ id: "togethercomputer/m2-bert-80M-8k-retrieval", name: "M2 BERT 80M 8K", type: "embedding" },
],
fireworks: [
{ id: "accounts/fireworks/models/deepseek-v3p1", name: "DeepSeek V3.1" },
{ id: "accounts/fireworks/models/llama-v3p3-70b-instruct", name: "Llama 3.3 70B" },
{ id: "accounts/fireworks/models/qwen3-235b-a22b", name: "Qwen3 235B" },
{ id: "nomic-ai/nomic-embed-text-v1.5", name: "Nomic Embed Text v1.5", type: "embedding" },
],
cerebras: [
{ id: "gpt-oss-120b", name: "GPT OSS 120B" },
@@ -410,9 +417,20 @@ export const PROVIDER_MODELS = {
nvidia: [
{ id: "moonshotai/kimi-k2.5", name: "Kimi K2.5" },
{ id: "z-ai/glm4.7", name: "GLM 4.7" },
{ id: "nvidia/nv-embedqa-e5-v5", name: "NV EmbedQA E5 v5", type: "embedding" },
],
nebius: [
{ id: "meta-llama/Llama-3.3-70B-Instruct", name: "Llama 3.3 70B Instruct" },
{ id: "Qwen/Qwen3-Embedding-8B", name: "Qwen3 Embedding 8B", type: "embedding" },
],
"voyage-ai": [
{ id: "voyage-3-large", name: "Voyage 3 Large", type: "embedding" },
{ id: "voyage-3.5", name: "Voyage 3.5", type: "embedding" },
{ id: "voyage-3.5-lite", name: "Voyage 3.5 Lite", type: "embedding" },
{ id: "voyage-code-3", name: "Voyage Code 3", type: "embedding" },
{ id: "voyage-finance-2", name: "Voyage Finance 2", type: "embedding" },
{ id: "voyage-law-2", name: "Voyage Law 2", type: "embedding" },
{ id: "voyage-multilingual-2", name: "Voyage Multilingual 2", type: "embedding" },
],
siliconflow: [
{ id: "deepseek-ai/DeepSeek-V3.2", name: "DeepSeek V3.2" },

View File

@@ -7,6 +7,19 @@ import { refreshWithRetry } from "../services/tokenRefresh.js";
// Google AI (Gemini) provider aliases / identifiers
const GEMINI_PROVIDERS = new Set(["gemini", "google_ai_studio"]);
// Static map: provider id → embeddings endpoint (OpenAI-compatible body format)
const EMBEDDING_URLS = {
openai: "https://api.openai.com/v1/embeddings",
openrouter: "https://openrouter.ai/api/v1/embeddings",
mistral: "https://api.mistral.ai/v1/embeddings",
"voyage-ai": "https://api.voyageai.com/v1/embeddings",
fireworks: "https://api.fireworks.ai/inference/v1/embeddings",
together: "https://api.together.xyz/v1/embeddings",
nebius: "https://api.tokenfactory.nebius.com/v1/embeddings",
github: "https://models.github.ai/inference/embeddings",
nvidia: "https://integrate.api.nvidia.com/v1/embeddings",
};
/**
* Check whether a provider targets the Google AI (Gemini) embeddings API.
* @param {string} provider
@@ -77,22 +90,16 @@ function buildEmbeddingsUrl(provider, model, credentials, input) {
return `https://generativelanguage.googleapis.com/v1beta/${modelPath}:embedContent?key=${encodeURIComponent(apiKey)}`;
}
switch (provider) {
case "openai":
return "https://api.openai.com/v1/embeddings";
case "openrouter":
return "https://openrouter.ai/api/v1/embeddings";
default:
// openai-compatible & custom-embedding providers: use their baseUrl + /embeddings
if (provider?.startsWith?.("openai-compatible-") || provider?.startsWith?.("custom-embedding-")) {
const rawBaseUrl = credentials?.providerSpecificData?.baseUrl || "https://api.openai.com/v1";
// Defensive: strip trailing slash and accidental /embeddings to avoid double-append
const baseUrl = rawBaseUrl.replace(/\/$/, "").replace(/\/embeddings$/, "");
return `${baseUrl}/embeddings`;
}
// For other providers, attempt to use their base URL pattern with /embeddings path
return null;
if (EMBEDDING_URLS[provider]) return EMBEDDING_URLS[provider];
// openai-compatible & custom-embedding providers: use their baseUrl + /embeddings
if (provider?.startsWith?.("openai-compatible-") || provider?.startsWith?.("custom-embedding-")) {
const rawBaseUrl = credentials?.providerSpecificData?.baseUrl || "https://api.openai.com/v1";
// Defensive: strip trailing slash and accidental /embeddings to avoid double-append
const baseUrl = rawBaseUrl.replace(/\/$/, "").replace(/\/embeddings$/, "");
return `${baseUrl}/embeddings`;
}
return null;
}
/**

View File

@@ -455,7 +455,209 @@ async function handleOpenAiTts({ model, input, credentials, responseFormat = "mp
return createTtsResponse(base64, "mp3", responseFormat);
}
// ── TTS Provider Registry (DRY) ────────────────────────────────
// ── Generic TTS Format Handlers (config-driven via ttsConfig.format) ──────
// Parse `model` string as "modelId/voiceId" or "modelId" (modelId may contain slashes — match against known list)
function parseModelVoice(model, defaultModel = "", defaultVoice = "", knownModels = []) {
if (!model) return { modelId: defaultModel, voiceId: defaultVoice };
// Find longest known model id that prefixes `model`
const known = knownModels.map((m) => m.id || m).filter(Boolean).sort((a, b) => b.length - a.length);
for (const id of known) {
if (model === id) return { modelId: id, voiceId: defaultVoice };
if (model.startsWith(`${id}/`)) return { modelId: id, voiceId: model.slice(id.length + 1) };
}
// Fallback: split on last "/" so "vendor/model/voice" → model="vendor/model", voice="voice"
const idx = model.lastIndexOf("/");
if (idx > 0) return { modelId: model.slice(0, idx), voiceId: model.slice(idx + 1) };
return { modelId: defaultModel || model, voiceId: defaultVoice || model };
}
// Convert upstream Response (binary audio) to { base64, format }
async function responseToBase64(res, defaultFormat = "mp3") {
const buf = await res.arrayBuffer();
if (buf.byteLength < 100) throw new Error("Upstream returned empty audio");
const ctype = res.headers.get("content-type") || "";
let format = defaultFormat;
if (ctype.includes("wav")) format = "wav";
else if (ctype.includes("mpeg") || ctype.includes("mp3")) format = "mp3";
else if (ctype.includes("ogg")) format = "ogg";
return { base64: Buffer.from(buf).toString("base64"), format };
}
async function throwUpstreamError(res) {
const text = await res.text().catch(() => "");
let msg = `Upstream error (${res.status})`;
try {
const parsed = JSON.parse(text);
msg = parsed?.error?.message || parsed?.message || parsed?.detail?.message || (typeof parsed?.detail === "string" ? parsed.detail : null) || text || msg;
} catch { msg = text || msg; }
throw new Error(msg);
}
// Hyperbolic: POST { text } → { audio: base64 }
async function ttsHyperbolic({ baseUrl, apiKey, text }) {
const res = await fetch(baseUrl, {
method: "POST",
headers: { "Content-Type": "application/json", "Authorization": `Bearer ${apiKey}` },
body: JSON.stringify({ text }),
});
if (!res.ok) await throwUpstreamError(res);
const data = await res.json();
return { base64: data.audio, format: "mp3" };
}
// Deepgram: model via query, Token auth, returns binary
async function ttsDeepgram({ baseUrl, apiKey, text, modelId }) {
const url = new URL(baseUrl);
url.searchParams.set("model", modelId || "aura-asteria-en");
const res = await fetch(url.toString(), {
method: "POST",
headers: { "Content-Type": "application/json", "Authorization": `Token ${apiKey}` },
body: JSON.stringify({ text }),
});
if (!res.ok) await throwUpstreamError(res);
return responseToBase64(res, "mp3");
}
// Nvidia NIM: POST { input: { text }, voice, model } → binary
async function ttsNvidia({ baseUrl, apiKey, text, modelId, voiceId }) {
const res = await fetch(baseUrl, {
method: "POST",
headers: { "Content-Type": "application/json", "Authorization": `Bearer ${apiKey}` },
body: JSON.stringify({ input: { text }, voice: voiceId || "default", model: modelId }),
});
if (!res.ok) await throwUpstreamError(res);
return responseToBase64(res, "wav");
}
// HuggingFace: POST {baseUrl}/{modelId} { inputs: text } → binary
async function ttsHuggingFace({ baseUrl, apiKey, text, modelId }) {
if (!modelId || modelId.includes("..")) throw new Error("Invalid HuggingFace model ID");
const res = await fetch(`${baseUrl}/${modelId}`, {
method: "POST",
headers: { "Content-Type": "application/json", "Authorization": `Bearer ${apiKey}` },
body: JSON.stringify({ inputs: text }),
});
if (!res.ok) await throwUpstreamError(res);
return responseToBase64(res, "wav");
}
// Inworld: POST { text, voiceId, modelId, audioConfig } → JSON { audioContent }
async function ttsInworld({ baseUrl, apiKey, text, modelId, voiceId }) {
const res = await fetch(baseUrl, {
method: "POST",
headers: { "Content-Type": "application/json", "Authorization": `Basic ${apiKey}` },
body: JSON.stringify({
text,
voiceId: voiceId || "Alex",
modelId: modelId || "inworld-tts-1.5-mini",
audioConfig: { audioEncoding: "MP3" },
}),
});
if (!res.ok) await throwUpstreamError(res);
const data = await res.json();
if (!data.audioContent) throw new Error("Inworld TTS returned no audio");
return { base64: data.audioContent, format: "mp3" };
}
// Cartesia: POST { model_id, transcript, voice, output_format } → binary
async function ttsCartesia({ baseUrl, apiKey, text, modelId, voiceId }) {
const res = await fetch(baseUrl, {
method: "POST",
headers: {
"Content-Type": "application/json",
"X-API-Key": apiKey,
"Cartesia-Version": "2024-06-10",
},
body: JSON.stringify({
model_id: modelId || "sonic-2",
transcript: text,
...(voiceId ? { voice: { mode: "id", id: voiceId } } : {}),
output_format: { container: "mp3", bit_rate: 128000, sample_rate: 44100 },
}),
});
if (!res.ok) await throwUpstreamError(res);
return responseToBase64(res, "mp3");
}
// PlayHT: token format "userId:apiKey", voice = s3 URL
async function ttsPlayHt({ baseUrl, apiKey, text, modelId, voiceId }) {
const [userId, key] = (apiKey || ":").split(":");
const res = await fetch(baseUrl, {
method: "POST",
headers: {
"Content-Type": "application/json",
"Accept": "audio/mpeg",
"X-USER-ID": userId || "",
"Authorization": `Bearer ${key || apiKey}`,
},
body: JSON.stringify({
text,
voice: voiceId || "s3://voice-cloning-zero-shot/d9ff78ba-d016-47f6-b0ef-dd630f59414e/female-cs/manifest.json",
voice_engine: modelId || "PlayDialog",
output_format: "mp3",
speed: 1,
}),
});
if (!res.ok) await throwUpstreamError(res);
return responseToBase64(res, "mp3");
}
// Coqui (local, noAuth): POST { text, speaker_id } → WAV
async function ttsCoqui({ baseUrl, text, voiceId }) {
const res = await fetch(baseUrl, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ text, ...(voiceId ? { speaker_id: voiceId } : {}) }),
});
if (!res.ok) await throwUpstreamError(res);
return responseToBase64(res, "wav");
}
// Tortoise (local, noAuth): POST { text, voice } → binary
async function ttsTortoise({ baseUrl, text, voiceId }) {
const res = await fetch(baseUrl, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ text, voice: voiceId || "random" }),
});
if (!res.ok) await throwUpstreamError(res);
return responseToBase64(res, "wav");
}
// OpenAI-compatible (qwen3-tts, openai-compat): POST { model, input, voice } → binary
async function ttsOpenAiCompat({ baseUrl, apiKey, text, modelId, voiceId }) {
const headers = { "Content-Type": "application/json" };
if (apiKey) headers["Authorization"] = `Bearer ${apiKey}`;
const res = await fetch(baseUrl, {
method: "POST",
headers,
body: JSON.stringify({
model: modelId,
input: text,
voice: voiceId || "alloy",
response_format: "mp3",
speed: 1.0,
}),
});
if (!res.ok) await throwUpstreamError(res);
return responseToBase64(res, "mp3");
}
// Format → handler dispatcher (DRY)
const FORMAT_HANDLERS = {
hyperbolic: ttsHyperbolic,
deepgram: ttsDeepgram,
"nvidia-tts": ttsNvidia,
"huggingface-tts": ttsHuggingFace,
inworld: ttsInworld,
cartesia: ttsCartesia,
playht: ttsPlayHt,
coqui: ttsCoqui,
tortoise: ttsTortoise,
openai: ttsOpenAiCompat,
};
// ── TTS Provider Registry (legacy noAuth + special providers) ──────────
const TTS_PROVIDERS = {
"google-tts": {
synthesize: async (text, model) => {
@@ -480,15 +682,10 @@ const TTS_PROVIDERS = {
},
"elevenlabs": {
synthesize: async (text, model, credentials) => {
if (!credentials?.apiKey) {
throw new Error("ElevenLabs API key required");
}
// model format: "voice_id" or "model_id/voice_id"
if (!credentials?.apiKey) throw new Error("ElevenLabs API key required");
let modelId = "eleven_flash_v2_5";
let voiceId = model;
if (model && model.includes("/")) {
[modelId, voiceId] = model.split("/");
}
if (model && model.includes("/")) [modelId, voiceId] = model.split("/");
const base64 = await elevenlabsTts(text, voiceId, credentials.apiKey, modelId);
return { base64, format: "mp3" };
},
@@ -508,15 +705,24 @@ const TTS_PROVIDERS = {
},
};
// ── Generic dispatcher: providers with ttsConfig.format ────────────────
// Resolves to TTS_PROVIDERS first; falls back to ttsConfig.format dispatch.
async function synthesizeViaConfig(provider, text, model, credentials) {
const { AI_PROVIDERS } = await import("@/shared/constants/providers");
const cfg = AI_PROVIDERS[provider]?.ttsConfig;
if (!cfg) return null;
const handler = FORMAT_HANDLERS[cfg.format];
if (!handler) return null;
const apiKey = credentials?.apiKey;
if (cfg.authType !== "none" && !apiKey) throw new Error(`${provider} API key required`);
const defaultModel = cfg.models?.[0]?.id || "";
const { modelId, voiceId } = parseModelVoice(model, defaultModel, "", cfg.models || []);
return handler({ baseUrl: cfg.baseUrl, apiKey, text, modelId, voiceId });
}
// ── Core handler ───────────────────────────────────────────────
/**
* Synthesize text to audio.
* @param {object} options
* @param {string} options.provider - "google-tts" | "edge-tts" | "local-device" | "openai"
* @param {string} options.model - voice/lang id
* @param {string} options.input - text to synthesize
* @param {object} [options.credentials] - required for openai
* @param {string} [options.responseFormat] - "mp3" (default) | "json" (base64)
* @returns {Promise<{success, response, status?, error?}>}
*/
export async function handleTtsCore({ provider, model, input, credentials, responseFormat = "mp3" }) {
@@ -525,18 +731,20 @@ export async function handleTtsCore({ provider, model, input, credentials, respo
}
const ttsProvider = TTS_PROVIDERS[provider];
if (!ttsProvider) {
return createErrorResult(HTTP_STATUS.BAD_REQUEST, `Provider '${provider}' does not support TTS via this route.`);
}
try {
const result = await ttsProvider.synthesize(input.trim(), model, credentials, responseFormat);
// OpenAI returns full response object
if (result.success !== undefined) return result;
// Other providers return { base64, format }
return createTtsResponse(result.base64, result.format, responseFormat);
// Legacy/special providers (google-tts, edge-tts, local-device, elevenlabs, openai, openrouter)
if (ttsProvider) {
const result = await ttsProvider.synthesize(input.trim(), model, credentials, responseFormat);
if (result.success !== undefined) return result;
return createTtsResponse(result.base64, result.format, responseFormat);
}
// Generic config-driven dispatcher (hyperbolic, deepgram, nvidia, huggingface, inworld, cartesia, playht, coqui, tortoise, qwen, ...)
const result = await synthesizeViaConfig(provider, input.trim(), model, credentials);
if (result) return createTtsResponse(result.base64, result.format, responseFormat);
return createErrorResult(HTTP_STATUS.BAD_REQUEST, `Provider '${provider}' does not support TTS via this route.`);
} catch (err) {
return createErrorResult(HTTP_STATUS.BAD_GATEWAY, err.message || "TTS synthesis failed");
}