podcastdistributiona/lib/ai/pipeline/segment.ts

import type { DialogueTurn, ScriptSection, StructuredScript } from "../types";

export interface AudioSegment {
  turns: DialogueTurn[];
  characters: number;
  /** Distinct voices used in this segment (drives speech vs dialogue choice). */
  uniqueVoices: number;
}

/** Map each script turn to a voice, dropping turns with empty text. */
export function flattenTurns(
  script: StructuredScript,
  voiceMap: Record<string, string>,
  fallbackVoiceId: string
): DialogueTurn[] {
  const turns: DialogueTurn[] = [];
  for (const section of script.sections) {
    for (const turn of section.turns) {
      const text = turn.text.trim();
      if (!text) continue;
      turns.push({ text, voiceId: voiceMap[turn.speakerKey] ?? fallbackVoiceId });
    }
  }
  return turns;
}

/** Split text longer than maxChars at sentence boundaries (then hard-wrap if needed). */
export function splitLongText(text: string, maxChars: number): string[] {
  if (text.length <= maxChars) return [text];
  const sentences = text.match(/[^.!?]+[.!?]*\s*/g) ?? [text];
  const parts: string[] = [];
  let current = "";
  for (const sentence of sentences) {
    if (sentence.length > maxChars) {
      // A single very long sentence — hard-wrap on whitespace.
      if (current) {
        parts.push(current.trim());
        current = "";
      }
      for (let i = 0; i < sentence.length; i += maxChars) {
        parts.push(sentence.slice(i, i + maxChars).trim());
      }
      continue;
    }
    if ((current + sentence).length > maxChars) {
      parts.push(current.trim());
      current = sentence;
    } else {
      current += sentence;
    }
  }
  if (current.trim()) parts.push(current.trim());
  return parts.filter(Boolean);
}

/**
 * Group dialogue turns into segments each within `maxChars`. Turns longer than
 * the limit are split (preserving their voice). Each segment is later sent to
 * ElevenLabs as one request, then all segment MP3s are stitched together.
 */
export function segmentTurns(turns: DialogueTurn[], maxChars: number): AudioSegment[] {
  // First expand any oversized turns into multiple sub-turns.
  const expanded: DialogueTurn[] = [];
  for (const turn of turns) {
    for (const piece of splitLongText(turn.text, maxChars)) {
      expanded.push({ text: piece, voiceId: turn.voiceId });
    }
  }

  const segments: AudioSegment[] = [];
  let bucket: DialogueTurn[] = [];
  let chars = 0;

  const flush = () => {
    if (bucket.length === 0) return;
    segments.push({
      turns: bucket,
      characters: chars,
      uniqueVoices: new Set(bucket.map((t) => t.voiceId)).size,
    });
    bucket = [];
    chars = 0;
  };

  for (const turn of expanded) {
    if (chars + turn.text.length > maxChars && bucket.length > 0) flush();
    bucket.push(turn);
    chars += turn.text.length;
  }
  flush();
  return segments;
}

/** Convenience: full script → audio segments. */
export function segmentScript(
  script: StructuredScript,
  voiceMap: Record<string, string>,
  fallbackVoiceId: string,
  maxChars: number
): AudioSegment[] {
  return segmentTurns(flattenTurns(script, voiceMap, fallbackVoiceId), maxChars);
}

/** Total characters across a script (for cost/limit estimation). */
export function totalCharacters(sections: ScriptSection[]): number {
  return sections.reduce(
    (sum, s) => sum + s.turns.reduce((n, t) => n + t.text.length, 0),
    0
  );
}