import type { DialogueTurn, ScriptSection, StructuredScript } from "../types"; export interface AudioSegment { turns: DialogueTurn[]; characters: number; /** Distinct voices used in this segment (drives speech vs dialogue choice). */ uniqueVoices: number; } /** Map each script turn to a voice, dropping turns with empty text. */ export function flattenTurns( script: StructuredScript, voiceMap: Record, fallbackVoiceId: string ): DialogueTurn[] { const turns: DialogueTurn[] = []; for (const section of script.sections) { for (const turn of section.turns) { const text = turn.text.trim(); if (!text) continue; turns.push({ text, voiceId: voiceMap[turn.speakerKey] ?? fallbackVoiceId }); } } return turns; } /** Split text longer than maxChars at sentence boundaries (then hard-wrap if needed). */ export function splitLongText(text: string, maxChars: number): string[] { if (text.length <= maxChars) return [text]; const sentences = text.match(/[^.!?]+[.!?]*\s*/g) ?? [text]; const parts: string[] = []; let current = ""; for (const sentence of sentences) { if (sentence.length > maxChars) { // A single very long sentence — hard-wrap on whitespace. if (current) { parts.push(current.trim()); current = ""; } for (let i = 0; i < sentence.length; i += maxChars) { parts.push(sentence.slice(i, i + maxChars).trim()); } continue; } if ((current + sentence).length > maxChars) { parts.push(current.trim()); current = sentence; } else { current += sentence; } } if (current.trim()) parts.push(current.trim()); return parts.filter(Boolean); } /** * Group dialogue turns into segments each within `maxChars`. Turns longer than * the limit are split (preserving their voice). Each segment is later sent to * ElevenLabs as one request, then all segment MP3s are stitched together. */ export function segmentTurns(turns: DialogueTurn[], maxChars: number): AudioSegment[] { // First expand any oversized turns into multiple sub-turns. const expanded: DialogueTurn[] = []; for (const turn of turns) { for (const piece of splitLongText(turn.text, maxChars)) { expanded.push({ text: piece, voiceId: turn.voiceId }); } } const segments: AudioSegment[] = []; let bucket: DialogueTurn[] = []; let chars = 0; const flush = () => { if (bucket.length === 0) return; segments.push({ turns: bucket, characters: chars, uniqueVoices: new Set(bucket.map((t) => t.voiceId)).size, }); bucket = []; chars = 0; }; for (const turn of expanded) { if (chars + turn.text.length > maxChars && bucket.length > 0) flush(); bucket.push(turn); chars += turn.text.length; } flush(); return segments; } /** Convenience: full script → audio segments. */ export function segmentScript( script: StructuredScript, voiceMap: Record, fallbackVoiceId: string, maxChars: number ): AudioSegment[] { return segmentTurns(flattenTurns(script, voiceMap, fallbackVoiceId), maxChars); } /** Total characters across a script (for cost/limit estimation). */ export function totalCharacters(sections: ScriptSection[]): number { return sections.reduce( (sum, s) => sum + s.turns.reduce((n, t) => n + t.text.length, 0), 0 ); }