51c541ad22
Cross-cutting input-validation, isolation, and DoS-resistance fixes across the app, API, billing, queue, and infra layers. - Runtime validation (zod) for client-supplied admin actions (role/plan/ limits), series generation index, and all pg-boss queue payloads - Auth: require email verification before sign-in; reject weak/placeholder/ short BETTER_AUTH_SECRET in production - Billing: sanitize Stripe/PayPal errors (log server-side, generic to client); race-safe subscription upsert; only count "processed" webhook events as handled; verify org membership in getEffectivePlan to block plan escalation - Series generation: reserve usage up front and refund on failure; bill the owning org, not the caller's active org - Injection defenses: HTML-escape user fields in emails, strip CR/LF from subject/recipient, validate ElevenLabs voiceId before URL interpolation - Media routes: stream off disk instead of buffering whole files; rate-limit anonymous public audio/cover endpoints by client IP
109 lines
3.4 KiB
TypeScript
109 lines
3.4 KiB
TypeScript
import type { AudioProvider, DialogueTurn, Voice } from "../types";
|
|
|
|
const API = "https://api.elevenlabs.io/v1";
|
|
const TTS_MODEL = process.env.ELEVENLABS_TTS_MODEL ?? "eleven_multilingual_v2";
|
|
const DIALOGUE_MODEL = process.env.ELEVENLABS_DIALOGUE_MODEL ?? "eleven_v3";
|
|
const OUTPUT_FORMAT = "mp3_44100_128";
|
|
|
|
/** ElevenLabs voice IDs are opaque alphanumeric tokens; reject anything else. */
|
|
const VOICE_ID_PATTERN = /^[A-Za-z0-9_-]+$/;
|
|
|
|
function apiKey(): string {
|
|
const k = process.env.ELEVENLABS_API_KEY;
|
|
if (!k) throw new Error("ELEVENLABS_API_KEY is not set");
|
|
return k;
|
|
}
|
|
|
|
/** Validate a voice ID before it is interpolated into a request URL path. */
|
|
function safeVoiceId(voiceId: string): string {
|
|
if (!VOICE_ID_PATTERN.test(voiceId)) {
|
|
throw new Error(`Invalid ElevenLabs voiceId: ${voiceId}`);
|
|
}
|
|
return encodeURIComponent(voiceId);
|
|
}
|
|
|
|
interface ElevenVoice {
|
|
voice_id: string;
|
|
name: string;
|
|
preview_url?: string;
|
|
labels?: Record<string, string>;
|
|
}
|
|
|
|
export class ElevenLabsAudioProvider implements AudioProvider {
|
|
// Kept safely under the ~2,000-char dialogue limit to leave headroom.
|
|
readonly maxCharsPerRequest = 1800;
|
|
|
|
async synthesizeSpeech(
|
|
text: string,
|
|
voiceId: string,
|
|
_opts?: { language?: string }
|
|
): Promise<{ audio: Buffer; characters: number }> {
|
|
const res = await fetch(
|
|
`${API}/text-to-speech/${safeVoiceId(voiceId)}?output_format=${OUTPUT_FORMAT}`,
|
|
{
|
|
method: "POST",
|
|
headers: {
|
|
"xi-api-key": apiKey(),
|
|
"Content-Type": "application/json",
|
|
accept: "audio/mpeg",
|
|
},
|
|
body: JSON.stringify({
|
|
text,
|
|
model_id: TTS_MODEL,
|
|
voice_settings: { stability: 0.5, similarity_boost: 0.75 },
|
|
}),
|
|
}
|
|
);
|
|
if (!res.ok) throw new Error(`ElevenLabs TTS ${res.status}: ${await safeText(res)}`);
|
|
return { audio: Buffer.from(await res.arrayBuffer()), characters: text.length };
|
|
}
|
|
|
|
async synthesizeDialogue(
|
|
turns: DialogueTurn[],
|
|
_opts?: { language?: string }
|
|
): Promise<{ audio: Buffer; characters: number }> {
|
|
const res = await fetch(`${API}/text-to-dialogue?output_format=${OUTPUT_FORMAT}`, {
|
|
method: "POST",
|
|
headers: {
|
|
"xi-api-key": apiKey(),
|
|
"Content-Type": "application/json",
|
|
accept: "audio/mpeg",
|
|
},
|
|
body: JSON.stringify({
|
|
inputs: turns.map((t) => ({ text: t.text, voice_id: t.voiceId })),
|
|
model_id: DIALOGUE_MODEL,
|
|
}),
|
|
});
|
|
if (!res.ok) throw new Error(`ElevenLabs dialogue ${res.status}: ${await safeText(res)}`);
|
|
const characters = turns.reduce((n, t) => n + t.text.length, 0);
|
|
return { audio: Buffer.from(await res.arrayBuffer()), characters };
|
|
}
|
|
|
|
async listVoices(): Promise<Voice[]> {
|
|
const res = await fetch(`${API}/voices`, { headers: { "xi-api-key": apiKey() } });
|
|
if (!res.ok) throw new Error(`ElevenLabs voices ${res.status}`);
|
|
const data = (await res.json()) as { voices?: ElevenVoice[] };
|
|
return (data.voices ?? []).map((v) => ({
|
|
id: v.voice_id,
|
|
name: v.name,
|
|
gender: normalizeGender(v.labels?.gender),
|
|
accent: v.labels?.accent,
|
|
description: v.labels?.description,
|
|
previewUrl: v.preview_url,
|
|
}));
|
|
}
|
|
}
|
|
|
|
function normalizeGender(g?: string): Voice["gender"] {
|
|
if (g === "male" || g === "female") return g;
|
|
return "neutral";
|
|
}
|
|
|
|
async function safeText(res: Response): Promise<string> {
|
|
try {
|
|
return await res.text();
|
|
} catch {
|
|
return res.statusText;
|
|
}
|
|
}
|