import type { AudioProvider, DialogueTurn, Voice } from "../types"; const API = "https://api.elevenlabs.io/v1"; const TTS_MODEL = process.env.ELEVENLABS_TTS_MODEL ?? "eleven_multilingual_v2"; const DIALOGUE_MODEL = process.env.ELEVENLABS_DIALOGUE_MODEL ?? "eleven_v3"; const OUTPUT_FORMAT = "mp3_44100_128"; function apiKey(): string { const k = process.env.ELEVENLABS_API_KEY; if (!k) throw new Error("ELEVENLABS_API_KEY is not set"); return k; } interface ElevenVoice { voice_id: string; name: string; preview_url?: string; labels?: Record; } export class ElevenLabsAudioProvider implements AudioProvider { // Kept safely under the ~2,000-char dialogue limit to leave headroom. readonly maxCharsPerRequest = 1800; async synthesizeSpeech( text: string, voiceId: string, _opts?: { language?: string } ): Promise<{ audio: Buffer; characters: number }> { const res = await fetch( `${API}/text-to-speech/${voiceId}?output_format=${OUTPUT_FORMAT}`, { method: "POST", headers: { "xi-api-key": apiKey(), "Content-Type": "application/json", accept: "audio/mpeg", }, body: JSON.stringify({ text, model_id: TTS_MODEL, voice_settings: { stability: 0.5, similarity_boost: 0.75 }, }), } ); if (!res.ok) throw new Error(`ElevenLabs TTS ${res.status}: ${await safeText(res)}`); return { audio: Buffer.from(await res.arrayBuffer()), characters: text.length }; } async synthesizeDialogue( turns: DialogueTurn[], _opts?: { language?: string } ): Promise<{ audio: Buffer; characters: number }> { const res = await fetch(`${API}/text-to-dialogue?output_format=${OUTPUT_FORMAT}`, { method: "POST", headers: { "xi-api-key": apiKey(), "Content-Type": "application/json", accept: "audio/mpeg", }, body: JSON.stringify({ inputs: turns.map((t) => ({ text: t.text, voice_id: t.voiceId })), model_id: DIALOGUE_MODEL, }), }); if (!res.ok) throw new Error(`ElevenLabs dialogue ${res.status}: ${await safeText(res)}`); const characters = turns.reduce((n, t) => n + t.text.length, 0); return { audio: Buffer.from(await res.arrayBuffer()), characters }; } async listVoices(): Promise { const res = await fetch(`${API}/voices`, { headers: { "xi-api-key": apiKey() } }); if (!res.ok) throw new Error(`ElevenLabs voices ${res.status}`); const data = (await res.json()) as { voices?: ElevenVoice[] }; return (data.voices ?? []).map((v) => ({ id: v.voice_id, name: v.name, gender: normalizeGender(v.labels?.gender), accent: v.labels?.accent, description: v.labels?.description, previewUrl: v.preview_url, })); } } function normalizeGender(g?: string): Voice["gender"] { if (g === "male" || g === "female") return g; return "neutral"; } async function safeText(res: Response): Promise { try { return await res.text(); } catch { return res.statusText; } }