104 lines
2.7 KiB
TypeScript
104 lines
2.7 KiB
TypeScript
|
|
/**
|
||
|
|
* Provider abstraction for the three AI capabilities. Each capability has a thin
|
||
|
|
* interface so the underlying model (GPT-4, ElevenLabs, DALL·E) can be swapped
|
||
|
|
* via the registry in providers/index.ts without touching call sites.
|
||
|
|
*/
|
||
|
|
|
||
|
|
export type EpisodeFormat = "SOLO" | "INTERVIEW" | "MULTI_HOST";
|
||
|
|
|
||
|
|
export interface SpeakerRole {
|
||
|
|
/** Stable key referenced by script turns, e.g. "host", "guest", "cohost". */
|
||
|
|
speakerKey: string;
|
||
|
|
displayName: string;
|
||
|
|
}
|
||
|
|
|
||
|
|
export interface EpisodeConfig {
|
||
|
|
title?: string;
|
||
|
|
topic: string;
|
||
|
|
tone: string;
|
||
|
|
format: EpisodeFormat;
|
||
|
|
/** ISO language code, e.g. "en", "es". */
|
||
|
|
language: string;
|
||
|
|
targetLengthMin: number;
|
||
|
|
audience?: string;
|
||
|
|
speakers: SpeakerRole[];
|
||
|
|
}
|
||
|
|
|
||
|
|
// ─────────────── Script ───────────────
|
||
|
|
|
||
|
|
export interface ScriptTurn {
|
||
|
|
speakerKey: string;
|
||
|
|
text: string;
|
||
|
|
}
|
||
|
|
|
||
|
|
export interface ScriptSection {
|
||
|
|
id: string;
|
||
|
|
title: string;
|
||
|
|
turns: ScriptTurn[];
|
||
|
|
}
|
||
|
|
|
||
|
|
export interface StructuredScript {
|
||
|
|
title: string;
|
||
|
|
sections: ScriptSection[];
|
||
|
|
}
|
||
|
|
|
||
|
|
export interface TokenUsage {
|
||
|
|
inputTokens: number;
|
||
|
|
outputTokens: number;
|
||
|
|
}
|
||
|
|
|
||
|
|
export interface ScriptProvider {
|
||
|
|
readonly model: string;
|
||
|
|
generate(config: EpisodeConfig): Promise<{ script: StructuredScript; usage: TokenUsage }>;
|
||
|
|
regenerateSection(
|
||
|
|
config: EpisodeConfig,
|
||
|
|
script: StructuredScript,
|
||
|
|
sectionId: string
|
||
|
|
): Promise<{ section: ScriptSection; usage: TokenUsage }>;
|
||
|
|
}
|
||
|
|
|
||
|
|
// ─────────────── Audio ───────────────
|
||
|
|
|
||
|
|
export interface Voice {
|
||
|
|
id: string;
|
||
|
|
name: string;
|
||
|
|
gender?: "male" | "female" | "neutral";
|
||
|
|
accent?: string;
|
||
|
|
description?: string;
|
||
|
|
previewUrl?: string;
|
||
|
|
}
|
||
|
|
|
||
|
|
/** One line of multi-voice dialogue. */
|
||
|
|
export interface DialogueTurn {
|
||
|
|
text: string;
|
||
|
|
voiceId: string;
|
||
|
|
}
|
||
|
|
|
||
|
|
export interface AudioProvider {
|
||
|
|
/** Synthesize a single voice reading (used for SOLO and as a fallback). */
|
||
|
|
synthesizeSpeech(
|
||
|
|
text: string,
|
||
|
|
voiceId: string,
|
||
|
|
opts?: { language?: string }
|
||
|
|
): Promise<{ audio: Buffer; characters: number }>;
|
||
|
|
/** Synthesize a multi-voice dialogue chunk (≤ provider char limit, ≤10 voices). */
|
||
|
|
synthesizeDialogue(
|
||
|
|
turns: DialogueTurn[],
|
||
|
|
opts?: { language?: string }
|
||
|
|
): Promise<{ audio: Buffer; characters: number }>;
|
||
|
|
/** Live voice catalog for the account. */
|
||
|
|
listVoices(): Promise<Voice[]>;
|
||
|
|
/** Hard cap on characters per synthesis request (drives segmentation). */
|
||
|
|
readonly maxCharsPerRequest: number;
|
||
|
|
}
|
||
|
|
|
||
|
|
// ─────────────── Art ───────────────
|
||
|
|
|
||
|
|
export interface ArtProvider {
|
||
|
|
readonly model: string;
|
||
|
|
generateCover(
|
||
|
|
prompt: string,
|
||
|
|
opts?: { size?: "1024x1024" }
|
||
|
|
): Promise<{ data: Buffer; revisedPrompt?: string }>;
|
||
|
|
}
|