Initial commit: PodcastYes — AI podcast platform
This commit is contained in:
@@ -0,0 +1,110 @@
|
||||
import type { DialogueTurn, ScriptSection, StructuredScript } from "../types";
|
||||
|
||||
export interface AudioSegment {
|
||||
turns: DialogueTurn[];
|
||||
characters: number;
|
||||
/** Distinct voices used in this segment (drives speech vs dialogue choice). */
|
||||
uniqueVoices: number;
|
||||
}
|
||||
|
||||
/** Map each script turn to a voice, dropping turns with empty text. */
|
||||
export function flattenTurns(
|
||||
script: StructuredScript,
|
||||
voiceMap: Record<string, string>,
|
||||
fallbackVoiceId: string
|
||||
): DialogueTurn[] {
|
||||
const turns: DialogueTurn[] = [];
|
||||
for (const section of script.sections) {
|
||||
for (const turn of section.turns) {
|
||||
const text = turn.text.trim();
|
||||
if (!text) continue;
|
||||
turns.push({ text, voiceId: voiceMap[turn.speakerKey] ?? fallbackVoiceId });
|
||||
}
|
||||
}
|
||||
return turns;
|
||||
}
|
||||
|
||||
/** Split text longer than maxChars at sentence boundaries (then hard-wrap if needed). */
|
||||
export function splitLongText(text: string, maxChars: number): string[] {
|
||||
if (text.length <= maxChars) return [text];
|
||||
const sentences = text.match(/[^.!?]+[.!?]*\s*/g) ?? [text];
|
||||
const parts: string[] = [];
|
||||
let current = "";
|
||||
for (const sentence of sentences) {
|
||||
if (sentence.length > maxChars) {
|
||||
// A single very long sentence — hard-wrap on whitespace.
|
||||
if (current) {
|
||||
parts.push(current.trim());
|
||||
current = "";
|
||||
}
|
||||
for (let i = 0; i < sentence.length; i += maxChars) {
|
||||
parts.push(sentence.slice(i, i + maxChars).trim());
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if ((current + sentence).length > maxChars) {
|
||||
parts.push(current.trim());
|
||||
current = sentence;
|
||||
} else {
|
||||
current += sentence;
|
||||
}
|
||||
}
|
||||
if (current.trim()) parts.push(current.trim());
|
||||
return parts.filter(Boolean);
|
||||
}
|
||||
|
||||
/**
|
||||
* Group dialogue turns into segments each within `maxChars`. Turns longer than
|
||||
* the limit are split (preserving their voice). Each segment is later sent to
|
||||
* ElevenLabs as one request, then all segment MP3s are stitched together.
|
||||
*/
|
||||
export function segmentTurns(turns: DialogueTurn[], maxChars: number): AudioSegment[] {
|
||||
// First expand any oversized turns into multiple sub-turns.
|
||||
const expanded: DialogueTurn[] = [];
|
||||
for (const turn of turns) {
|
||||
for (const piece of splitLongText(turn.text, maxChars)) {
|
||||
expanded.push({ text: piece, voiceId: turn.voiceId });
|
||||
}
|
||||
}
|
||||
|
||||
const segments: AudioSegment[] = [];
|
||||
let bucket: DialogueTurn[] = [];
|
||||
let chars = 0;
|
||||
|
||||
const flush = () => {
|
||||
if (bucket.length === 0) return;
|
||||
segments.push({
|
||||
turns: bucket,
|
||||
characters: chars,
|
||||
uniqueVoices: new Set(bucket.map((t) => t.voiceId)).size,
|
||||
});
|
||||
bucket = [];
|
||||
chars = 0;
|
||||
};
|
||||
|
||||
for (const turn of expanded) {
|
||||
if (chars + turn.text.length > maxChars && bucket.length > 0) flush();
|
||||
bucket.push(turn);
|
||||
chars += turn.text.length;
|
||||
}
|
||||
flush();
|
||||
return segments;
|
||||
}
|
||||
|
||||
/** Convenience: full script → audio segments. */
|
||||
export function segmentScript(
|
||||
script: StructuredScript,
|
||||
voiceMap: Record<string, string>,
|
||||
fallbackVoiceId: string,
|
||||
maxChars: number
|
||||
): AudioSegment[] {
|
||||
return segmentTurns(flattenTurns(script, voiceMap, fallbackVoiceId), maxChars);
|
||||
}
|
||||
|
||||
/** Total characters across a script (for cost/limit estimation). */
|
||||
export function totalCharacters(sections: ScriptSection[]): number {
|
||||
return sections.reduce(
|
||||
(sum, s) => sum + s.turns.reduce((n, t) => n + t.text.length, 0),
|
||||
0
|
||||
);
|
||||
}
|
||||
Reference in New Issue
Block a user