lib/ai/providers/elevenlabs-audio.ts

import type { AudioProvider, DialogueTurn, Voice } from "../types";

const API = "https://api.elevenlabs.io/v1";
const TTS_MODEL = process.env.ELEVENLABS_TTS_MODEL ?? "eleven_multilingual_v2";
const DIALOGUE_MODEL = process.env.ELEVENLABS_DIALOGUE_MODEL ?? "eleven_v3";
const OUTPUT_FORMAT = "mp3_44100_128";

function apiKey(): string {
  const k = process.env.ELEVENLABS_API_KEY;
  if (!k) throw new Error("ELEVENLABS_API_KEY is not set");
  return k;
}

interface ElevenVoice {
  voice_id: string;
  name: string;
  preview_url?: string;
  labels?: Record<string, string>;
}

export class ElevenLabsAudioProvider implements AudioProvider {
  // Kept safely under the ~2,000-char dialogue limit to leave headroom.
  readonly maxCharsPerRequest = 1800;

  async synthesizeSpeech(
    text: string,
    voiceId: string,
    _opts?: { language?: string }
  ): Promise<{ audio: Buffer; characters: number }> {
    const res = await fetch(
      `${API}/text-to-speech/${voiceId}?output_format=${OUTPUT_FORMAT}`,
      {
        method: "POST",
        headers: {
          "xi-api-key": apiKey(),
          "Content-Type": "application/json",
          accept: "audio/mpeg",
        },
        body: JSON.stringify({
          text,
          model_id: TTS_MODEL,
          voice_settings: { stability: 0.5, similarity_boost: 0.75 },
        }),
      }
    );
    if (!res.ok) throw new Error(`ElevenLabs TTS ${res.status}: ${await safeText(res)}`);
    return { audio: Buffer.from(await res.arrayBuffer()), characters: text.length };
  }

  async synthesizeDialogue(
    turns: DialogueTurn[],
    _opts?: { language?: string }
  ): Promise<{ audio: Buffer; characters: number }> {
    const res = await fetch(`${API}/text-to-dialogue?output_format=${OUTPUT_FORMAT}`, {
      method: "POST",
      headers: {
        "xi-api-key": apiKey(),
        "Content-Type": "application/json",
        accept: "audio/mpeg",
      },
      body: JSON.stringify({
        inputs: turns.map((t) => ({ text: t.text, voice_id: t.voiceId })),
        model_id: DIALOGUE_MODEL,
      }),
    });
    if (!res.ok) throw new Error(`ElevenLabs dialogue ${res.status}: ${await safeText(res)}`);
    const characters = turns.reduce((n, t) => n + t.text.length, 0);
    return { audio: Buffer.from(await res.arrayBuffer()), characters };
  }

  async listVoices(): Promise<Voice[]> {
    const res = await fetch(`${API}/voices`, { headers: { "xi-api-key": apiKey() } });
    if (!res.ok) throw new Error(`ElevenLabs voices ${res.status}`);
    const data = (await res.json()) as { voices?: ElevenVoice[] };
    return (data.voices ?? []).map((v) => ({
      id: v.voice_id,
      name: v.name,
      gender: normalizeGender(v.labels?.gender),
      accent: v.labels?.accent,
      description: v.labels?.description,
      previewUrl: v.preview_url,
    }));
  }
}

function normalizeGender(g?: string): Voice["gender"] {
  if (g === "male" || g === "female") return g;
  return "neutral";
}

async function safeText(res: Response): Promise<string> {
  try {
    return await res.text();
  } catch {
    return res.statusText;
  }
}
Initial commit: PodcastYes — AI podcast platform 2026-06-07 03:58:32 -04:00			`import type { AudioProvider, DialogueTurn, Voice } from "../types";`

			`const API = "https://api.elevenlabs.io/v1";`
			`const TTS_MODEL = process.env.ELEVENLABS_TTS_MODEL ?? "eleven_multilingual_v2";`
			`const DIALOGUE_MODEL = process.env.ELEVENLABS_DIALOGUE_MODEL ?? "eleven_v3";`
			`const OUTPUT_FORMAT = "mp3_44100_128";`

			`function apiKey(): string {`
			`const k = process.env.ELEVENLABS_API_KEY;`
			`if (!k) throw new Error("ELEVENLABS_API_KEY is not set");`
			`return k;`
			`}`

			`interface ElevenVoice {`
			`voice_id: string;`
			`name: string;`
			`preview_url?: string;`
			`labels?: Record<string, string>;`
			`}`

			`export class ElevenLabsAudioProvider implements AudioProvider {`
			`// Kept safely under the ~2,000-char dialogue limit to leave headroom.`
			`readonly maxCharsPerRequest = 1800;`

			`async synthesizeSpeech(`
			`text: string,`
			`voiceId: string,`
			`_opts?: { language?: string }`
			`): Promise<{ audio: Buffer; characters: number }> {`
			`const res = await fetch(`
			`${API}/text-to-speech/${voiceId}?output_format=${OUTPUT_FORMAT}`,
			`{`
			`method: "POST",`
			`headers: {`
			`"xi-api-key": apiKey(),`
			`"Content-Type": "application/json",`
			`accept: "audio/mpeg",`
			`},`
			`body: JSON.stringify({`
			`text,`
			`model_id: TTS_MODEL,`
			`voice_settings: { stability: 0.5, similarity_boost: 0.75 },`
			`}),`
			`}`
			`);`
			if (!res.ok) throw new Error(`ElevenLabs TTS ${res.status}: ${await safeText(res)}`);
			`return { audio: Buffer.from(await res.arrayBuffer()), characters: text.length };`
			`}`

			`async synthesizeDialogue(`
			`turns: DialogueTurn[],`
			`_opts?: { language?: string }`
			`): Promise<{ audio: Buffer; characters: number }> {`
			const res = await fetch(`${API}/text-to-dialogue?output_format=${OUTPUT_FORMAT}`, {
			`method: "POST",`
			`headers: {`
			`"xi-api-key": apiKey(),`
			`"Content-Type": "application/json",`
			`accept: "audio/mpeg",`
			`},`
			`body: JSON.stringify({`
			`inputs: turns.map((t) => ({ text: t.text, voice_id: t.voiceId })),`
			`model_id: DIALOGUE_MODEL,`
			`}),`
			`});`
			if (!res.ok) throw new Error(`ElevenLabs dialogue ${res.status}: ${await safeText(res)}`);
			`const characters = turns.reduce((n, t) => n + t.text.length, 0);`
			`return { audio: Buffer.from(await res.arrayBuffer()), characters };`
			`}`

			`async listVoices(): Promise<Voice[]> {`
			const res = await fetch(`${API}/voices`, { headers: { "xi-api-key": apiKey() } });
			if (!res.ok) throw new Error(`ElevenLabs voices ${res.status}`);
			`const data = (await res.json()) as { voices?: ElevenVoice[] };`
			`return (data.voices ?? []).map((v) => ({`
			`id: v.voice_id,`
			`name: v.name,`
			`gender: normalizeGender(v.labels?.gender),`
			`accent: v.labels?.accent,`
			`description: v.labels?.description,`
			`previewUrl: v.preview_url,`
			`}));`
			`}`
			`}`

			`function normalizeGender(g?: string): Voice["gender"] {`
			`if (g === "male" \|\| g === "female") return g;`
			`return "neutral";`
			`}`

			`async function safeText(res: Response): Promise<string> {`
			`try {`
			`return await res.text();`
			`} catch {`
			`return res.statusText;`
			`}`
			`}`