Files
9router/open-sse/handlers/ttsProviders/index.js
decolua d4bc42e1f5 feat: add STT support, Gemini TTS, and expand usage tracking
- Speech-to-Text: full pipeline with sttCore handler, /v1/audio/transcriptions
  endpoint, sttConfig for OpenAI, Gemini, Groq, Deepgram, AssemblyAI,
  HuggingFace, NVIDIA Parakeet; new 9router-stt skill
- Gemini TTS: add gemini provider with 30 prebuilt voices and TTS_PROVIDER_CONFIG
- Usage: implement GLM (intl/cn) and MiniMax (intl/cn) quota fetchers; refactor
  Gemini CLI usage to use retrieveUserQuota with per-model buckets
- Disabled models: lowdb-backed disabledModelsDb + /api/models/disabled route
- Header search: reusable Zustand store (headerSearchStore) wired into Header
- CLI tools: add Claude Cowork tool card and cowork-settings API
- Providers: introduce mediaPriority sorting in getProvidersByKind, add
  Kimi K2.6, reorder hermes, drop qwen STT kind
- UI: expand media-providers/[kind]/[id] page (+314), enhance OAuthModal,
  ModelSelectModal, ProviderTopology, ProxyPools, ProviderLimits
- Assets: refresh provider PNGs (alicode, byteplus, cloudflare-ai, nvidia,
  ollama, vertex, volcengine-ark) and add aws-polly, fal-ai, jina-ai, recraft,
  runwayml, stability-ai, topaz, black-forest-labs
2026-05-05 10:32:59 +07:00

51 lines
1.8 KiB
JavaScript

// TTS provider registry
import googleTts from "./googleTts.js";
import edgeTts, { fetchEdgeTtsVoices } from "./edgeTts.js";
import localDevice, { fetchLocalDeviceVoices } from "./localDevice.js";
import elevenlabs, { fetchElevenLabsVoices } from "./elevenlabs.js";
import openai from "./openai.js";
import openrouter from "./openrouter.js";
import gemini, { fetchGeminiVoices } from "./gemini.js";
import { FORMAT_HANDLERS } from "./genericFormats.js";
import { parseModelVoice } from "./_base.js";
// Special providers with custom synthesize() logic
const SPECIAL_ADAPTERS = {
"google-tts": googleTts,
"edge-tts": edgeTts,
"local-device": localDevice,
elevenlabs,
openai,
openrouter,
gemini,
};
export function getTtsAdapter(provider) {
return SPECIAL_ADAPTERS[provider] || null;
}
// Generic config-driven dispatcher (uses ttsConfig.format)
export async function synthesizeViaConfig(provider, text, model, credentials) {
const { AI_PROVIDERS } = await import("@/shared/constants/providers");
const cfg = AI_PROVIDERS[provider]?.ttsConfig;
if (!cfg) return null;
const handler = FORMAT_HANDLERS[cfg.format];
if (!handler) return null;
const apiKey = credentials?.apiKey;
if (cfg.authType !== "none" && !apiKey) throw new Error(`${provider} API key required`);
const defaultModel = cfg.models?.[0]?.id || "";
const { modelId, voiceId } = parseModelVoice(model, defaultModel, "", cfg.models || []);
return handler({ baseUrl: cfg.baseUrl, apiKey, text, modelId, voiceId });
}
// Voice fetchers (used by /api/media-providers/tts/voices route)
export const VOICE_FETCHERS = {
"edge-tts": fetchEdgeTtsVoices,
"local-device": fetchLocalDeviceVoices,
elevenlabs: fetchElevenLabsVoices,
gemini: fetchGeminiVoices,
};
// Re-export for backward compat
export { fetchEdgeTtsVoices, fetchLocalDeviceVoices, fetchElevenLabsVoices, fetchGeminiVoices };