Files
2026-06-07 03:58:32 -04:00

104 lines
2.7 KiB
TypeScript

/**
* Provider abstraction for the three AI capabilities. Each capability has a thin
* interface so the underlying model (GPT-4, ElevenLabs, DALL·E) can be swapped
* via the registry in providers/index.ts without touching call sites.
*/
export type EpisodeFormat = "SOLO" | "INTERVIEW" | "MULTI_HOST";
export interface SpeakerRole {
/** Stable key referenced by script turns, e.g. "host", "guest", "cohost". */
speakerKey: string;
displayName: string;
}
export interface EpisodeConfig {
title?: string;
topic: string;
tone: string;
format: EpisodeFormat;
/** ISO language code, e.g. "en", "es". */
language: string;
targetLengthMin: number;
audience?: string;
speakers: SpeakerRole[];
}
// ─────────────── Script ───────────────
export interface ScriptTurn {
speakerKey: string;
text: string;
}
export interface ScriptSection {
id: string;
title: string;
turns: ScriptTurn[];
}
export interface StructuredScript {
title: string;
sections: ScriptSection[];
}
export interface TokenUsage {
inputTokens: number;
outputTokens: number;
}
export interface ScriptProvider {
readonly model: string;
generate(config: EpisodeConfig): Promise<{ script: StructuredScript; usage: TokenUsage }>;
regenerateSection(
config: EpisodeConfig,
script: StructuredScript,
sectionId: string
): Promise<{ section: ScriptSection; usage: TokenUsage }>;
}
// ─────────────── Audio ───────────────
export interface Voice {
id: string;
name: string;
gender?: "male" | "female" | "neutral";
accent?: string;
description?: string;
previewUrl?: string;
}
/** One line of multi-voice dialogue. */
export interface DialogueTurn {
text: string;
voiceId: string;
}
export interface AudioProvider {
/** Synthesize a single voice reading (used for SOLO and as a fallback). */
synthesizeSpeech(
text: string,
voiceId: string,
opts?: { language?: string }
): Promise<{ audio: Buffer; characters: number }>;
/** Synthesize a multi-voice dialogue chunk (≤ provider char limit, ≤10 voices). */
synthesizeDialogue(
turns: DialogueTurn[],
opts?: { language?: string }
): Promise<{ audio: Buffer; characters: number }>;
/** Live voice catalog for the account. */
listVoices(): Promise<Voice[]>;
/** Hard cap on characters per synthesis request (drives segmentation). */
readonly maxCharsPerRequest: number;
}
// ─────────────── Art ───────────────
export interface ArtProvider {
readonly model: string;
generateCover(
prompt: string,
opts?: { size?: "1024x1024" }
): Promise<{ data: Buffer; revisedPrompt?: string }>;
}