mirror of
https://github.com/decolua/9router.git
synced 2026-05-08 12:01:28 +00:00
168 lines
5.6 KiB
JavaScript
168 lines
5.6 KiB
JavaScript
// Generic config-driven TTS handlers — dispatched by ttsConfig.format.
|
|
// Each handler accepts { baseUrl, apiKey, text, modelId, voiceId } and returns { base64, format }.
|
|
import { responseToBase64, throwUpstreamError } from "./_base.js";
|
|
|
|
// Hyperbolic: POST { text } → { audio: base64 }
|
|
async function hyperbolic({ baseUrl, apiKey, text }) {
|
|
const res = await fetch(baseUrl, {
|
|
method: "POST",
|
|
headers: { "Content-Type": "application/json", "Authorization": `Bearer ${apiKey}` },
|
|
body: JSON.stringify({ text }),
|
|
});
|
|
if (!res.ok) await throwUpstreamError(res);
|
|
const data = await res.json();
|
|
return { base64: data.audio, format: "mp3" };
|
|
}
|
|
|
|
// Deepgram: model via query, Token auth, returns binary
|
|
async function deepgram({ baseUrl, apiKey, text, modelId }) {
|
|
const url = new URL(baseUrl);
|
|
url.searchParams.set("model", modelId || "aura-asteria-en");
|
|
const res = await fetch(url.toString(), {
|
|
method: "POST",
|
|
headers: { "Content-Type": "application/json", "Authorization": `Token ${apiKey}` },
|
|
body: JSON.stringify({ text }),
|
|
});
|
|
if (!res.ok) await throwUpstreamError(res);
|
|
return responseToBase64(res, "mp3");
|
|
}
|
|
|
|
// Nvidia NIM: POST { input: { text }, voice, model } → binary
|
|
async function nvidia({ baseUrl, apiKey, text, modelId, voiceId }) {
|
|
const res = await fetch(baseUrl, {
|
|
method: "POST",
|
|
headers: { "Content-Type": "application/json", "Authorization": `Bearer ${apiKey}` },
|
|
body: JSON.stringify({ input: { text }, voice: voiceId || "default", model: modelId }),
|
|
});
|
|
if (!res.ok) await throwUpstreamError(res);
|
|
return responseToBase64(res, "wav");
|
|
}
|
|
|
|
// HuggingFace: POST {baseUrl}/{modelId} { inputs: text } → binary
|
|
async function huggingface({ baseUrl, apiKey, text, modelId }) {
|
|
if (!modelId || modelId.includes("..")) throw new Error("Invalid HuggingFace model ID");
|
|
const res = await fetch(`${baseUrl}/${modelId}`, {
|
|
method: "POST",
|
|
headers: { "Content-Type": "application/json", "Authorization": `Bearer ${apiKey}` },
|
|
body: JSON.stringify({ inputs: text }),
|
|
});
|
|
if (!res.ok) await throwUpstreamError(res);
|
|
return responseToBase64(res, "wav");
|
|
}
|
|
|
|
// Inworld: Basic auth, JSON { audioContent }
|
|
async function inworld({ baseUrl, apiKey, text, modelId, voiceId }) {
|
|
const res = await fetch(baseUrl, {
|
|
method: "POST",
|
|
headers: { "Content-Type": "application/json", "Authorization": `Basic ${apiKey}` },
|
|
body: JSON.stringify({
|
|
text,
|
|
voiceId: voiceId || "Alex",
|
|
modelId: modelId || "inworld-tts-1.5-mini",
|
|
audioConfig: { audioEncoding: "MP3" },
|
|
}),
|
|
});
|
|
if (!res.ok) await throwUpstreamError(res);
|
|
const data = await res.json();
|
|
if (!data.audioContent) throw new Error("Inworld TTS returned no audio");
|
|
return { base64: data.audioContent, format: "mp3" };
|
|
}
|
|
|
|
// Cartesia: X-API-Key header
|
|
async function cartesia({ baseUrl, apiKey, text, modelId, voiceId }) {
|
|
const res = await fetch(baseUrl, {
|
|
method: "POST",
|
|
headers: {
|
|
"Content-Type": "application/json",
|
|
"X-API-Key": apiKey,
|
|
"Cartesia-Version": "2024-06-10",
|
|
},
|
|
body: JSON.stringify({
|
|
model_id: modelId || "sonic-2",
|
|
transcript: text,
|
|
...(voiceId ? { voice: { mode: "id", id: voiceId } } : {}),
|
|
output_format: { container: "mp3", bit_rate: 128000, sample_rate: 44100 },
|
|
}),
|
|
});
|
|
if (!res.ok) await throwUpstreamError(res);
|
|
return responseToBase64(res, "mp3");
|
|
}
|
|
|
|
// PlayHT: token format "userId:apiKey", voice = s3 URL
|
|
async function playht({ baseUrl, apiKey, text, modelId, voiceId }) {
|
|
const [userId, key] = (apiKey || ":").split(":");
|
|
const res = await fetch(baseUrl, {
|
|
method: "POST",
|
|
headers: {
|
|
"Content-Type": "application/json",
|
|
"Accept": "audio/mpeg",
|
|
"X-USER-ID": userId || "",
|
|
"Authorization": `Bearer ${key || apiKey}`,
|
|
},
|
|
body: JSON.stringify({
|
|
text,
|
|
voice: voiceId || "s3://voice-cloning-zero-shot/d9ff78ba-d016-47f6-b0ef-dd630f59414e/female-cs/manifest.json",
|
|
voice_engine: modelId || "PlayDialog",
|
|
output_format: "mp3",
|
|
speed: 1,
|
|
}),
|
|
});
|
|
if (!res.ok) await throwUpstreamError(res);
|
|
return responseToBase64(res, "mp3");
|
|
}
|
|
|
|
// Coqui (local, noAuth): POST { text, speaker_id } → WAV
|
|
async function coqui({ baseUrl, text, voiceId }) {
|
|
const res = await fetch(baseUrl, {
|
|
method: "POST",
|
|
headers: { "Content-Type": "application/json" },
|
|
body: JSON.stringify({ text, ...(voiceId ? { speaker_id: voiceId } : {}) }),
|
|
});
|
|
if (!res.ok) await throwUpstreamError(res);
|
|
return responseToBase64(res, "wav");
|
|
}
|
|
|
|
// Tortoise (local, noAuth)
|
|
async function tortoise({ baseUrl, text, voiceId }) {
|
|
const res = await fetch(baseUrl, {
|
|
method: "POST",
|
|
headers: { "Content-Type": "application/json" },
|
|
body: JSON.stringify({ text, voice: voiceId || "random" }),
|
|
});
|
|
if (!res.ok) await throwUpstreamError(res);
|
|
return responseToBase64(res, "wav");
|
|
}
|
|
|
|
// OpenAI-compatible upstream (qwen3-tts, etc.)
|
|
async function openaiCompat({ baseUrl, apiKey, text, modelId, voiceId }) {
|
|
const headers = { "Content-Type": "application/json" };
|
|
if (apiKey) headers["Authorization"] = `Bearer ${apiKey}`;
|
|
const res = await fetch(baseUrl, {
|
|
method: "POST",
|
|
headers,
|
|
body: JSON.stringify({
|
|
model: modelId,
|
|
input: text,
|
|
voice: voiceId || "alloy",
|
|
response_format: "mp3",
|
|
speed: 1.0,
|
|
}),
|
|
});
|
|
if (!res.ok) await throwUpstreamError(res);
|
|
return responseToBase64(res, "mp3");
|
|
}
|
|
|
|
// format → handler dispatcher
|
|
export const FORMAT_HANDLERS = {
|
|
hyperbolic,
|
|
deepgram,
|
|
"nvidia-tts": nvidia,
|
|
"huggingface-tts": huggingface,
|
|
inworld,
|
|
cartesia,
|
|
playht,
|
|
coqui,
|
|
tortoise,
|
|
openai: openaiCompat,
|
|
};
|