lib/ai/types.ts

/**
 * Provider abstraction for the three AI capabilities. Each capability has a thin
 * interface so the underlying model (GPT-4, ElevenLabs, DALL·E) can be swapped
 * via the registry in providers/index.ts without touching call sites.
 */

export type EpisodeFormat = "SOLO" | "INTERVIEW" | "MULTI_HOST";

export interface SpeakerRole {
  /** Stable key referenced by script turns, e.g. "host", "guest", "cohost". */
  speakerKey: string;
  displayName: string;
}

export interface EpisodeConfig {
  title?: string;
  topic: string;
  tone: string;
  format: EpisodeFormat;
  /** ISO language code, e.g. "en", "es". */
  language: string;
  targetLengthMin: number;
  audience?: string;
  speakers: SpeakerRole[];
}

// ─────────────── Script ───────────────

export interface ScriptTurn {
  speakerKey: string;
  text: string;
}

export interface ScriptSection {
  id: string;
  title: string;
  turns: ScriptTurn[];
}

export interface StructuredScript {
  title: string;
  sections: ScriptSection[];
}

export interface TokenUsage {
  inputTokens: number;
  outputTokens: number;
}

export interface ScriptProvider {
  readonly model: string;
  generate(config: EpisodeConfig): Promise<{ script: StructuredScript; usage: TokenUsage }>;
  regenerateSection(
    config: EpisodeConfig,
    script: StructuredScript,
    sectionId: string
  ): Promise<{ section: ScriptSection; usage: TokenUsage }>;
}

// ─────────────── Audio ───────────────

export interface Voice {
  id: string;
  name: string;
  gender?: "male" | "female" | "neutral";
  accent?: string;
  description?: string;
  previewUrl?: string;
}

/** One line of multi-voice dialogue. */
export interface DialogueTurn {
  text: string;
  voiceId: string;
}

export interface AudioProvider {
  /** Synthesize a single voice reading (used for SOLO and as a fallback). */
  synthesizeSpeech(
    text: string,
    voiceId: string,
    opts?: { language?: string }
  ): Promise<{ audio: Buffer; characters: number }>;
  /** Synthesize a multi-voice dialogue chunk (≤ provider char limit, ≤10 voices). */
  synthesizeDialogue(
    turns: DialogueTurn[],
    opts?: { language?: string }
  ): Promise<{ audio: Buffer; characters: number }>;
  /** Live voice catalog for the account. */
  listVoices(): Promise<Voice[]>;
  /** Hard cap on characters per synthesis request (drives segmentation). */
  readonly maxCharsPerRequest: number;
}

// ─────────────── Art ───────────────

export interface ArtProvider {
  readonly model: string;
  generateCover(
    prompt: string,
    opts?: { size?: "1024x1024" }
  ): Promise<{ data: Buffer; revisedPrompt?: string }>;
}
Initial commit: PodcastYes — AI podcast platform 2026-06-07 03:58:32 -04:00			`/**`
			`* Provider abstraction for the three AI capabilities. Each capability has a thin`
			`* interface so the underlying model (GPT-4, ElevenLabs, DALL·E) can be swapped`
			`* via the registry in providers/index.ts without touching call sites.`
			`*/`

			`export type EpisodeFormat = "SOLO" \| "INTERVIEW" \| "MULTI_HOST";`

			`export interface SpeakerRole {`
			`/** Stable key referenced by script turns, e.g. "host", "guest", "cohost". */`
			`speakerKey: string;`
			`displayName: string;`
			`}`

			`export interface EpisodeConfig {`
			`title?: string;`
			`topic: string;`
			`tone: string;`
			`format: EpisodeFormat;`
			`/** ISO language code, e.g. "en", "es". */`
			`language: string;`
			`targetLengthMin: number;`
			`audience?: string;`
			`speakers: SpeakerRole[];`
			`}`

			`// ─────────────── Script ───────────────`

			`export interface ScriptTurn {`
			`speakerKey: string;`
			`text: string;`
			`}`

			`export interface ScriptSection {`
			`id: string;`
			`title: string;`
			`turns: ScriptTurn[];`
			`}`

			`export interface StructuredScript {`
			`title: string;`
			`sections: ScriptSection[];`
			`}`

			`export interface TokenUsage {`
			`inputTokens: number;`
			`outputTokens: number;`
			`}`

			`export interface ScriptProvider {`
			`readonly model: string;`
			`generate(config: EpisodeConfig): Promise<{ script: StructuredScript; usage: TokenUsage }>;`
			`regenerateSection(`
			`config: EpisodeConfig,`
			`script: StructuredScript,`
			`sectionId: string`
			`): Promise<{ section: ScriptSection; usage: TokenUsage }>;`
			`}`

			`// ─────────────── Audio ───────────────`

			`export interface Voice {`
			`id: string;`
			`name: string;`
			`gender?: "male" \| "female" \| "neutral";`
			`accent?: string;`
			`description?: string;`
			`previewUrl?: string;`
			`}`

			`/** One line of multi-voice dialogue. */`
			`export interface DialogueTurn {`
			`text: string;`
			`voiceId: string;`
			`}`

			`export interface AudioProvider {`
			`/** Synthesize a single voice reading (used for SOLO and as a fallback). */`
			`synthesizeSpeech(`
			`text: string,`
			`voiceId: string,`
			`opts?: { language?: string }`
			`): Promise<{ audio: Buffer; characters: number }>;`
			`/** Synthesize a multi-voice dialogue chunk (≤ provider char limit, ≤10 voices). */`
			`synthesizeDialogue(`
			`turns: DialogueTurn[],`
			`opts?: { language?: string }`
			`): Promise<{ audio: Buffer; characters: number }>;`
			`/** Live voice catalog for the account. */`
			`listVoices(): Promise<Voice[]>;`
			`/** Hard cap on characters per synthesis request (drives segmentation). */`
			`readonly maxCharsPerRequest: number;`
			`}`

			`// ─────────────── Art ───────────────`

			`export interface ArtProvider {`
			`readonly model: string;`
			`generateCover(`
			`prompt: string,`
			`opts?: { size?: "1024x1024" }`
			`): Promise<{ data: Buffer; revisedPrompt?: string }>;`
			`}`