Files
9router/open-sse/handlers/ttsProviders/genericFormats.js
2026-05-04 11:29:02 +07:00

168 lines
5.6 KiB
JavaScript

// Generic config-driven TTS handlers — dispatched by ttsConfig.format.
// Each handler accepts { baseUrl, apiKey, text, modelId, voiceId } and returns { base64, format }.
import { responseToBase64, throwUpstreamError } from "./_base.js";
// Hyperbolic: POST { text } → { audio: base64 }
async function hyperbolic({ baseUrl, apiKey, text }) {
const res = await fetch(baseUrl, {
method: "POST",
headers: { "Content-Type": "application/json", "Authorization": `Bearer ${apiKey}` },
body: JSON.stringify({ text }),
});
if (!res.ok) await throwUpstreamError(res);
const data = await res.json();
return { base64: data.audio, format: "mp3" };
}
// Deepgram: model via query, Token auth, returns binary
async function deepgram({ baseUrl, apiKey, text, modelId }) {
const url = new URL(baseUrl);
url.searchParams.set("model", modelId || "aura-asteria-en");
const res = await fetch(url.toString(), {
method: "POST",
headers: { "Content-Type": "application/json", "Authorization": `Token ${apiKey}` },
body: JSON.stringify({ text }),
});
if (!res.ok) await throwUpstreamError(res);
return responseToBase64(res, "mp3");
}
// Nvidia NIM: POST { input: { text }, voice, model } → binary
async function nvidia({ baseUrl, apiKey, text, modelId, voiceId }) {
const res = await fetch(baseUrl, {
method: "POST",
headers: { "Content-Type": "application/json", "Authorization": `Bearer ${apiKey}` },
body: JSON.stringify({ input: { text }, voice: voiceId || "default", model: modelId }),
});
if (!res.ok) await throwUpstreamError(res);
return responseToBase64(res, "wav");
}
// HuggingFace: POST {baseUrl}/{modelId} { inputs: text } → binary
async function huggingface({ baseUrl, apiKey, text, modelId }) {
if (!modelId || modelId.includes("..")) throw new Error("Invalid HuggingFace model ID");
const res = await fetch(`${baseUrl}/${modelId}`, {
method: "POST",
headers: { "Content-Type": "application/json", "Authorization": `Bearer ${apiKey}` },
body: JSON.stringify({ inputs: text }),
});
if (!res.ok) await throwUpstreamError(res);
return responseToBase64(res, "wav");
}
// Inworld: Basic auth, JSON { audioContent }
async function inworld({ baseUrl, apiKey, text, modelId, voiceId }) {
const res = await fetch(baseUrl, {
method: "POST",
headers: { "Content-Type": "application/json", "Authorization": `Basic ${apiKey}` },
body: JSON.stringify({
text,
voiceId: voiceId || "Alex",
modelId: modelId || "inworld-tts-1.5-mini",
audioConfig: { audioEncoding: "MP3" },
}),
});
if (!res.ok) await throwUpstreamError(res);
const data = await res.json();
if (!data.audioContent) throw new Error("Inworld TTS returned no audio");
return { base64: data.audioContent, format: "mp3" };
}
// Cartesia: X-API-Key header
async function cartesia({ baseUrl, apiKey, text, modelId, voiceId }) {
const res = await fetch(baseUrl, {
method: "POST",
headers: {
"Content-Type": "application/json",
"X-API-Key": apiKey,
"Cartesia-Version": "2024-06-10",
},
body: JSON.stringify({
model_id: modelId || "sonic-2",
transcript: text,
...(voiceId ? { voice: { mode: "id", id: voiceId } } : {}),
output_format: { container: "mp3", bit_rate: 128000, sample_rate: 44100 },
}),
});
if (!res.ok) await throwUpstreamError(res);
return responseToBase64(res, "mp3");
}
// PlayHT: token format "userId:apiKey", voice = s3 URL
async function playht({ baseUrl, apiKey, text, modelId, voiceId }) {
const [userId, key] = (apiKey || ":").split(":");
const res = await fetch(baseUrl, {
method: "POST",
headers: {
"Content-Type": "application/json",
"Accept": "audio/mpeg",
"X-USER-ID": userId || "",
"Authorization": `Bearer ${key || apiKey}`,
},
body: JSON.stringify({
text,
voice: voiceId || "s3://voice-cloning-zero-shot/d9ff78ba-d016-47f6-b0ef-dd630f59414e/female-cs/manifest.json",
voice_engine: modelId || "PlayDialog",
output_format: "mp3",
speed: 1,
}),
});
if (!res.ok) await throwUpstreamError(res);
return responseToBase64(res, "mp3");
}
// Coqui (local, noAuth): POST { text, speaker_id } → WAV
async function coqui({ baseUrl, text, voiceId }) {
const res = await fetch(baseUrl, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ text, ...(voiceId ? { speaker_id: voiceId } : {}) }),
});
if (!res.ok) await throwUpstreamError(res);
return responseToBase64(res, "wav");
}
// Tortoise (local, noAuth)
async function tortoise({ baseUrl, text, voiceId }) {
const res = await fetch(baseUrl, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ text, voice: voiceId || "random" }),
});
if (!res.ok) await throwUpstreamError(res);
return responseToBase64(res, "wav");
}
// OpenAI-compatible upstream (qwen3-tts, etc.)
async function openaiCompat({ baseUrl, apiKey, text, modelId, voiceId }) {
const headers = { "Content-Type": "application/json" };
if (apiKey) headers["Authorization"] = `Bearer ${apiKey}`;
const res = await fetch(baseUrl, {
method: "POST",
headers,
body: JSON.stringify({
model: modelId,
input: text,
voice: voiceId || "alloy",
response_format: "mp3",
speed: 1.0,
}),
});
if (!res.ok) await throwUpstreamError(res);
return responseToBase64(res, "mp3");
}
// format → handler dispatcher
export const FORMAT_HANDLERS = {
hyperbolic,
deepgram,
"nvidia-tts": nvidia,
"huggingface-tts": huggingface,
inworld,
cartesia,
playht,
coqui,
tortoise,
openai: openaiCompat,
};