111 lines
3.3 KiB
TypeScript
111 lines
3.3 KiB
TypeScript
import type { DialogueTurn, ScriptSection, StructuredScript } from "../types";
|
|
|
|
export interface AudioSegment {
|
|
turns: DialogueTurn[];
|
|
characters: number;
|
|
/** Distinct voices used in this segment (drives speech vs dialogue choice). */
|
|
uniqueVoices: number;
|
|
}
|
|
|
|
/** Map each script turn to a voice, dropping turns with empty text. */
|
|
export function flattenTurns(
|
|
script: StructuredScript,
|
|
voiceMap: Record<string, string>,
|
|
fallbackVoiceId: string
|
|
): DialogueTurn[] {
|
|
const turns: DialogueTurn[] = [];
|
|
for (const section of script.sections) {
|
|
for (const turn of section.turns) {
|
|
const text = turn.text.trim();
|
|
if (!text) continue;
|
|
turns.push({ text, voiceId: voiceMap[turn.speakerKey] ?? fallbackVoiceId });
|
|
}
|
|
}
|
|
return turns;
|
|
}
|
|
|
|
/** Split text longer than maxChars at sentence boundaries (then hard-wrap if needed). */
|
|
export function splitLongText(text: string, maxChars: number): string[] {
|
|
if (text.length <= maxChars) return [text];
|
|
const sentences = text.match(/[^.!?]+[.!?]*\s*/g) ?? [text];
|
|
const parts: string[] = [];
|
|
let current = "";
|
|
for (const sentence of sentences) {
|
|
if (sentence.length > maxChars) {
|
|
// A single very long sentence — hard-wrap on whitespace.
|
|
if (current) {
|
|
parts.push(current.trim());
|
|
current = "";
|
|
}
|
|
for (let i = 0; i < sentence.length; i += maxChars) {
|
|
parts.push(sentence.slice(i, i + maxChars).trim());
|
|
}
|
|
continue;
|
|
}
|
|
if ((current + sentence).length > maxChars) {
|
|
parts.push(current.trim());
|
|
current = sentence;
|
|
} else {
|
|
current += sentence;
|
|
}
|
|
}
|
|
if (current.trim()) parts.push(current.trim());
|
|
return parts.filter(Boolean);
|
|
}
|
|
|
|
/**
|
|
* Group dialogue turns into segments each within `maxChars`. Turns longer than
|
|
* the limit are split (preserving their voice). Each segment is later sent to
|
|
* ElevenLabs as one request, then all segment MP3s are stitched together.
|
|
*/
|
|
export function segmentTurns(turns: DialogueTurn[], maxChars: number): AudioSegment[] {
|
|
// First expand any oversized turns into multiple sub-turns.
|
|
const expanded: DialogueTurn[] = [];
|
|
for (const turn of turns) {
|
|
for (const piece of splitLongText(turn.text, maxChars)) {
|
|
expanded.push({ text: piece, voiceId: turn.voiceId });
|
|
}
|
|
}
|
|
|
|
const segments: AudioSegment[] = [];
|
|
let bucket: DialogueTurn[] = [];
|
|
let chars = 0;
|
|
|
|
const flush = () => {
|
|
if (bucket.length === 0) return;
|
|
segments.push({
|
|
turns: bucket,
|
|
characters: chars,
|
|
uniqueVoices: new Set(bucket.map((t) => t.voiceId)).size,
|
|
});
|
|
bucket = [];
|
|
chars = 0;
|
|
};
|
|
|
|
for (const turn of expanded) {
|
|
if (chars + turn.text.length > maxChars && bucket.length > 0) flush();
|
|
bucket.push(turn);
|
|
chars += turn.text.length;
|
|
}
|
|
flush();
|
|
return segments;
|
|
}
|
|
|
|
/** Convenience: full script → audio segments. */
|
|
export function segmentScript(
|
|
script: StructuredScript,
|
|
voiceMap: Record<string, string>,
|
|
fallbackVoiceId: string,
|
|
maxChars: number
|
|
): AudioSegment[] {
|
|
return segmentTurns(flattenTurns(script, voiceMap, fallbackVoiceId), maxChars);
|
|
}
|
|
|
|
/** Total characters across a script (for cost/limit estimation). */
|
|
export function totalCharacters(sections: ScriptSection[]): number {
|
|
return sections.reduce(
|
|
(sum, s) => sum + s.turns.reduce((n, t) => n + t.text.length, 0),
|
|
0
|
|
);
|
|
}
|