Initial commit: PodcastYes — AI podcast platform

This commit is contained in:
Leon Serfaty
2026-06-07 03:58:32 -04:00
commit 155507f21a
151 changed files with 19826 additions and 0 deletions
+52
View File
@@ -0,0 +1,52 @@
import { prisma } from "@/lib/db";
import type { TokenUsage } from "./types";
// Rough 2026 unit prices (USD). Tune in one place; admin cost dashboards read AiCostLog.
const PRICE = {
gptInputPer1k: 0.0025,
gptOutputPer1k: 0.01,
elevenPer1kChars: 0.3,
dallePerImage: 0.04,
};
export function scriptCostUsd(usage: TokenUsage): number {
return round4(
(usage.inputTokens / 1000) * PRICE.gptInputPer1k +
(usage.outputTokens / 1000) * PRICE.gptOutputPer1k
);
}
export function audioCostUsd(characters: number): number {
return round4((characters / 1000) * PRICE.elevenPer1kChars);
}
export function artCostUsd(images: number): number {
return round4(images * PRICE.dallePerImage);
}
export interface CostEntry {
provider: "openai" | "elevenlabs";
operation: "script" | "audio" | "art" | "repurpose";
units: number;
costUsd: number;
episodeId?: string;
userId?: string;
}
/** Record an AI usage/cost line for the admin monitoring dashboard. */
export async function recordCost(entry: CostEntry): Promise<void> {
await prisma.aiCostLog.create({
data: {
provider: entry.provider,
operation: entry.operation,
units: entry.units,
costUsd: entry.costUsd.toFixed(4),
episodeId: entry.episodeId,
userId: entry.userId,
},
});
}
function round4(n: number): number {
return Math.round(n * 10000) / 10000;
}
+45
View File
@@ -0,0 +1,45 @@
import { spawn } from "node:child_process";
const FFMPEG = process.env.FFMPEG_PATH ?? "ffmpeg";
const FFPROBE = process.env.FFPROBE_PATH ?? "ffprobe";
/** Run ffmpeg with the given args; rejects with the tail of stderr on non-zero exit. */
export function runFfmpeg(args: string[]): Promise<void> {
return new Promise((resolve, reject) => {
const proc = spawn(FFMPEG, args, { stdio: ["ignore", "ignore", "pipe"] });
let stderr = "";
proc.stderr.on("data", (d) => {
stderr += d.toString();
});
proc.on("error", (err) =>
reject(new Error(`Failed to spawn ffmpeg (${FFMPEG}): ${err.message}`))
);
proc.on("close", (code) =>
code === 0 ? resolve() : reject(new Error(`ffmpeg exited ${code}: ${stderr.slice(-600)}`))
);
});
}
/** Probe an audio file's duration in whole seconds, or null if ffprobe is unavailable. */
export function ffprobeDuration(file: string): Promise<number | null> {
return new Promise((resolve) => {
const proc = spawn(FFPROBE, [
"-v",
"error",
"-show_entries",
"format=duration",
"-of",
"default=noprint_wrappers=1:nokey=1",
file,
]);
let out = "";
proc.stdout.on("data", (d) => {
out += d.toString();
});
proc.on("error", () => resolve(null));
proc.on("close", () => {
const n = parseFloat(out.trim());
resolve(Number.isFinite(n) ? Math.round(n) : null);
});
});
}
+16
View File
@@ -0,0 +1,16 @@
import OpenAI from "openai";
let client: OpenAI | null = null;
/** Lazily-constructed OpenAI client (used for GPT-4 scripts and DALL·E art). */
export function openai(): OpenAI {
if (!client) {
const apiKey = process.env.OPENAI_API_KEY;
if (!apiKey) throw new Error("OPENAI_API_KEY is not set");
client = new OpenAI({ apiKey });
}
return client;
}
export const SCRIPT_MODEL = process.env.OPENAI_SCRIPT_MODEL ?? "gpt-4o";
export const ART_MODEL = process.env.OPENAI_ART_MODEL ?? "dall-e-3";
+228
View File
@@ -0,0 +1,228 @@
import { Prisma } from "@prisma/client";
import { prisma } from "@/lib/db";
import { setEpisodeStatus } from "@/lib/episodes/status";
import { scriptProvider, audioProvider, artProvider } from "@/lib/ai/providers";
import { buildCoverPrompt } from "@/lib/ai/providers/openai-art";
import { segmentScript } from "./segment";
import { stitchMp3 } from "./stitch";
import { storage, assetKey } from "@/lib/storage";
import { recordCost, scriptCostUsd, audioCostUsd, artCostUsd } from "@/lib/ai/cost";
import { incrementUsage } from "@/lib/usage/meter";
import { sendEmail, emailLayout } from "@/lib/email";
import { DEFAULT_VOICE_IDS } from "@/lib/ai/voices";
import type { EpisodeConfig, StructuredScript } from "@/lib/ai/types";
import type { GenerationType } from "@/lib/queue/jobs";
type EpisodeWithRelations = Prisma.EpisodeGetPayload<{
include: { speakers: true; user: true };
}>;
/**
* The episode generation pipeline, run by the worker.
* Stages: script → segment → synthesize → stitch → art → save → meter.
* `type` selects which stages run (full, or a single re-generation).
*/
export async function runEpisodeGeneration(
episodeId: string,
type: GenerationType = "full"
): Promise<void> {
const episode = await loadEpisode(episodeId);
const config = toConfig(episode);
const did = { script: false, audio: false, art: false };
if (type === "full" || type === "script") {
await generateScript(episode, config);
did.script = true;
}
if (type === "full" || type === "script" || type === "audio") {
await generateAudio(episode);
did.audio = true;
}
if (type === "full" || type === "art") {
await generateArt(episode);
did.art = true;
}
await setEpisodeStatus(episodeId, "SAVING", { stage: "Finalizing your episode" });
await meter(episode, did);
await setEpisodeStatus(episodeId, "READY", { stage: "Done" });
await notifyReady(episode);
}
async function loadEpisode(episodeId: string): Promise<EpisodeWithRelations> {
const episode = await prisma.episode.findUnique({
where: { id: episodeId },
include: { speakers: true, user: true },
});
if (!episode) throw new Error(`Episode ${episodeId} not found`);
return episode;
}
function toConfig(episode: EpisodeWithRelations): EpisodeConfig {
const speakers =
episode.speakers.length > 0
? episode.speakers.map((s) => ({ speakerKey: s.speakerKey, displayName: s.displayName }))
: [{ speakerKey: "host", displayName: "Host" }];
return {
title: episode.title,
topic: episode.topic,
tone: episode.tone,
format: episode.format,
language: episode.language,
targetLengthMin: episode.targetLengthMin,
audience: episode.audience ?? undefined,
speakers,
};
}
// ─────────────── Stage 1: script ───────────────
async function generateScript(episode: EpisodeWithRelations, config: EpisodeConfig) {
await setEpisodeStatus(episode.id, "SCRIPTING", { stage: "Writing the script" });
const { script, usage } = await scriptProvider().generate(config);
await prisma.script.upsert({
where: { episodeId: episode.id },
create: {
episodeId: episode.id,
content: script as unknown as Prisma.InputJsonValue,
model: scriptProvider().model,
},
update: { content: script as unknown as Prisma.InputJsonValue, version: { increment: 1 } },
});
// Adopt the generated title when the user didn't set one.
if (!episode.title?.trim() && script.title) {
await prisma.episode.update({ where: { id: episode.id }, data: { title: script.title } });
episode.title = script.title;
}
await recordCost({
provider: "openai",
operation: "script",
units: usage.inputTokens + usage.outputTokens,
costUsd: scriptCostUsd(usage),
episodeId: episode.id,
userId: episode.userId,
});
}
// ─────────────── Stages 24: segment → synthesize → stitch ───────────────
async function generateAudio(episode: EpisodeWithRelations) {
await setEpisodeStatus(episode.id, "SYNTHESIZING", { stage: "Recording the audio" });
const scriptRow = await prisma.script.findUnique({ where: { episodeId: episode.id } });
if (!scriptRow) throw new Error("Cannot synthesize audio before a script exists");
const script = scriptRow.content as unknown as StructuredScript;
const voiceMap: Record<string, string> = {};
for (const s of episode.speakers) voiceMap[s.speakerKey] = s.elevenVoiceId;
const fallback =
episode.speakers[0]?.elevenVoiceId ?? DEFAULT_VOICE_IDS.host;
const provider = audioProvider();
const segments = segmentScript(script, voiceMap, fallback, provider.maxCharsPerRequest);
if (segments.length === 0) throw new Error("Script produced no spoken lines");
const buffers: Buffer[] = [];
let totalChars = 0;
for (const seg of segments) {
const res =
seg.uniqueVoices <= 1
? await provider.synthesizeSpeech(
seg.turns.map((t) => t.text).join(" "),
seg.turns[0].voiceId,
{ language: episode.language }
)
: await provider.synthesizeDialogue(seg.turns, { language: episode.language });
buffers.push(res.audio);
totalChars += res.characters;
}
await setEpisodeStatus(episode.id, "STITCHING", { stage: "Mixing the audio" });
const { data, durationSec } = await stitchMp3(buffers);
const key = assetKey("mp3", `${episode.id}.mp3`);
await storage().put(key, data, "audio/mpeg");
await prisma.audioAsset.upsert({
where: { episodeId: episode.id },
create: {
episodeId: episode.id,
storageKey: key,
durationSec,
sizeBytes: data.length,
segments: { count: segments.length } as Prisma.InputJsonValue,
},
update: {
storageKey: key,
durationSec,
sizeBytes: data.length,
segments: { count: segments.length } as Prisma.InputJsonValue,
},
});
await recordCost({
provider: "elevenlabs",
operation: "audio",
units: totalChars,
costUsd: audioCostUsd(totalChars),
episodeId: episode.id,
userId: episode.userId,
});
}
// ─────────────── Stage 5: cover art ───────────────
async function generateArt(episode: EpisodeWithRelations) {
await setEpisodeStatus(episode.id, "ART", { stage: "Designing the cover art" });
const prompt = buildCoverPrompt(episode.topic, episode.tone, episode.title);
const { data, revisedPrompt } = await artProvider().generateCover(prompt);
const key = assetKey("art", `${episode.id}.png`);
await storage().put(key, data, "image/png");
await prisma.coverArt.upsert({
where: { episodeId: episode.id },
create: { episodeId: episode.id, storageKey: key, prompt: revisedPrompt ?? prompt, model: artProvider().model },
update: { storageKey: key, prompt: revisedPrompt ?? prompt },
});
await recordCost({
provider: "openai",
operation: "art",
units: 1,
costUsd: artCostUsd(1),
episodeId: episode.id,
userId: episode.userId,
});
}
// ─────────────── Stage 7: meter ───────────────
async function meter(
episode: EpisodeWithRelations,
did: { script: boolean; audio: boolean; art: boolean }
) {
const ownerId = episode.organizationId ?? episode.userId;
const ownerType = episode.organizationId ? "organization" : "user";
if (did.script) await incrementUsage(ownerId, ownerType, "script");
if (did.audio) await incrementUsage(ownerId, ownerType, "audio");
if (did.art) await incrementUsage(ownerId, ownerType, "art");
}
async function notifyReady(episode: EpisodeWithRelations) {
const appUrl = process.env.NEXT_PUBLIC_APP_URL ?? "http://localhost:3000";
try {
await sendEmail({
to: episode.user.email,
subject: `🎙️ "${episode.title}" is ready`,
html: emailLayout(
"Your episode is ready",
`${episode.title}” has finished generating — script, audio, and cover art are all set.`,
{ label: "Open episode", url: `${appUrl}/episodes/${episode.id}` }
),
text: `Your episode "${episode.title}" is ready: ${appUrl}/episodes/${episode.id}`,
});
} catch (err) {
console.error("[notifyReady] email failed (non-fatal)", err);
}
}
+53
View File
@@ -0,0 +1,53 @@
import { z } from "zod";
import { openai, SCRIPT_MODEL } from "@/lib/ai/openai";
import type { StructuredScript, TokenUsage } from "@/lib/ai/types";
export type RepurposeFormat = "blog" | "social_thread" | "newsletter";
const FORMAT_PROMPTS: Record<RepurposeFormat, string> = {
blog: "Write an engaging, SEO-friendly blog post based on this episode. Include a compelling title and well-structured markdown body with headings and a short conclusion.",
social_thread:
"Write a punchy social thread (610 posts, numbered) summarizing the episode's best insights. Start with a strong hook. Put the whole thread in the markdown body.",
newsletter:
"Write a friendly email newsletter edition about this episode: a subject line as the title, a short intro, 34 key takeaways as bullets, and a call-to-action to listen. Markdown body.",
};
const outputSchema = z.object({ title: z.string().min(1), body: z.string().min(1) });
export type RepurposedOutput = z.infer<typeof outputSchema>;
function scriptToText(script: StructuredScript): string {
return script.sections
.map((s) => `## ${s.title}\n` + s.turns.map((t) => t.text).join("\n"))
.join("\n\n");
}
export async function repurposeScript(
script: StructuredScript,
format: RepurposeFormat
): Promise<{ content: RepurposedOutput; usage: TokenUsage }> {
const transcript = scriptToText(script).slice(0, 9000);
const res = await openai().chat.completions.create({
model: SCRIPT_MODEL,
messages: [
{
role: "system",
content:
"You are a content marketer who repurposes podcast episodes into other formats. Return STRICT JSON: { \"title\": string, \"body\": string } where body is markdown.",
},
{
role: "user",
content: `${FORMAT_PROMPTS[format]}\n\nEpisode title: ${script.title}\n\nTranscript:\n${transcript}`,
},
],
response_format: { type: "json_object" },
temperature: 0.7,
});
const content = outputSchema.parse(JSON.parse(res.choices[0]?.message?.content ?? "{}"));
return {
content,
usage: {
inputTokens: res.usage?.prompt_tokens ?? 0,
outputTokens: res.usage?.completion_tokens ?? 0,
},
};
}
+110
View File
@@ -0,0 +1,110 @@
import type { DialogueTurn, ScriptSection, StructuredScript } from "../types";
export interface AudioSegment {
turns: DialogueTurn[];
characters: number;
/** Distinct voices used in this segment (drives speech vs dialogue choice). */
uniqueVoices: number;
}
/** Map each script turn to a voice, dropping turns with empty text. */
export function flattenTurns(
script: StructuredScript,
voiceMap: Record<string, string>,
fallbackVoiceId: string
): DialogueTurn[] {
const turns: DialogueTurn[] = [];
for (const section of script.sections) {
for (const turn of section.turns) {
const text = turn.text.trim();
if (!text) continue;
turns.push({ text, voiceId: voiceMap[turn.speakerKey] ?? fallbackVoiceId });
}
}
return turns;
}
/** Split text longer than maxChars at sentence boundaries (then hard-wrap if needed). */
export function splitLongText(text: string, maxChars: number): string[] {
if (text.length <= maxChars) return [text];
const sentences = text.match(/[^.!?]+[.!?]*\s*/g) ?? [text];
const parts: string[] = [];
let current = "";
for (const sentence of sentences) {
if (sentence.length > maxChars) {
// A single very long sentence — hard-wrap on whitespace.
if (current) {
parts.push(current.trim());
current = "";
}
for (let i = 0; i < sentence.length; i += maxChars) {
parts.push(sentence.slice(i, i + maxChars).trim());
}
continue;
}
if ((current + sentence).length > maxChars) {
parts.push(current.trim());
current = sentence;
} else {
current += sentence;
}
}
if (current.trim()) parts.push(current.trim());
return parts.filter(Boolean);
}
/**
* Group dialogue turns into segments each within `maxChars`. Turns longer than
* the limit are split (preserving their voice). Each segment is later sent to
* ElevenLabs as one request, then all segment MP3s are stitched together.
*/
export function segmentTurns(turns: DialogueTurn[], maxChars: number): AudioSegment[] {
// First expand any oversized turns into multiple sub-turns.
const expanded: DialogueTurn[] = [];
for (const turn of turns) {
for (const piece of splitLongText(turn.text, maxChars)) {
expanded.push({ text: piece, voiceId: turn.voiceId });
}
}
const segments: AudioSegment[] = [];
let bucket: DialogueTurn[] = [];
let chars = 0;
const flush = () => {
if (bucket.length === 0) return;
segments.push({
turns: bucket,
characters: chars,
uniqueVoices: new Set(bucket.map((t) => t.voiceId)).size,
});
bucket = [];
chars = 0;
};
for (const turn of expanded) {
if (chars + turn.text.length > maxChars && bucket.length > 0) flush();
bucket.push(turn);
chars += turn.text.length;
}
flush();
return segments;
}
/** Convenience: full script → audio segments. */
export function segmentScript(
script: StructuredScript,
voiceMap: Record<string, string>,
fallbackVoiceId: string,
maxChars: number
): AudioSegment[] {
return segmentTurns(flattenTurns(script, voiceMap, fallbackVoiceId), maxChars);
}
/** Total characters across a script (for cost/limit estimation). */
export function totalCharacters(sections: ScriptSection[]): number {
return sections.reduce(
(sum, s) => sum + s.turns.reduce((n, t) => n + t.text.length, 0),
0
);
}
+60
View File
@@ -0,0 +1,60 @@
import { promises as fs } from "node:fs";
import os from "node:os";
import path from "node:path";
import { runFfmpeg, ffprobeDuration } from "../ffmpeg";
/**
* Concatenate per-segment MP3 buffers into one normalized episode MP3.
*
* Segments are re-encoded (not stream-copied) through a single libmp3lame pass
* with loudness normalization, which guarantees a uniform codec/bitrate and
* avoids the header/timestamp glitches that `-c copy` concat can produce.
*/
export async function stitchMp3(
segments: Buffer[]
): Promise<{ data: Buffer; durationSec: number | null }> {
if (segments.length === 0) throw new Error("No audio segments to stitch");
const dir = await fs.mkdtemp(path.join(os.tmpdir(), "podcastyes-"));
try {
const files: string[] = [];
for (let i = 0; i < segments.length; i++) {
const file = path.join(dir, `seg_${String(i).padStart(4, "0")}.mp3`);
await fs.writeFile(file, segments[i]);
files.push(file);
}
// concat demuxer list — forward slashes so it parses on Windows and Linux.
const listPath = path.join(dir, "list.txt");
const listBody = files
.map((f) => `file '${f.split(path.sep).join("/").replace(/'/g, "'\\''")}'`)
.join("\n");
await fs.writeFile(listPath, listBody);
const outPath = path.join(dir, "episode.mp3");
await runFfmpeg([
"-y",
"-f",
"concat",
"-safe",
"0",
"-i",
listPath,
"-af",
"loudnorm=I=-16:TP=-1.5:LRA=11",
"-c:a",
"libmp3lame",
"-b:a",
"128k",
"-ar",
"44100",
outPath,
]);
const data = await fs.readFile(outPath);
const durationSec = await ffprobeDuration(outPath);
return { data, durationSec };
} finally {
await fs.rm(dir, { recursive: true, force: true }).catch(() => {});
}
}
+93
View File
@@ -0,0 +1,93 @@
import type { EpisodeConfig, StructuredScript } from "../types";
const FORMAT_GUIDANCE: Record<EpisodeConfig["format"], string> = {
SOLO: "A single host speaking directly to the listener. Use only the host speaker.",
INTERVIEW:
"A host interviewing a guest. Alternate naturally between host questions and guest answers.",
MULTI_HOST:
"A panel of co-hosts in lively conversation. Distribute lines across all speakers and let them react to each other.",
};
/** Roughly 150 spoken words per minute → target word budget for the whole episode. */
function wordBudget(minutes: number): number {
return Math.round(minutes * 150);
}
export function buildScriptMessages(config: EpisodeConfig) {
const speakerList = config.speakers
.map((s) => `- key "${s.speakerKey}" = ${s.displayName}`)
.join("\n");
const system = [
"You are an expert podcast scriptwriter and showrunner.",
"You write natural, engaging, spoken-word scripts that sound great when read aloud by AI voices.",
"Avoid stage directions, sound-effect notes, and parentheticals — output only spoken dialogue.",
"Return STRICT JSON only, matching the requested schema. Do not include markdown fences.",
].join(" ");
const user = [
`Write a complete podcast episode script in ${config.language}.`,
"",
`Topic: ${config.topic}`,
`Tone: ${config.tone}`,
`Format: ${config.format}${FORMAT_GUIDANCE[config.format]}`,
config.audience ? `Target audience: ${config.audience}` : "",
`Approximate length: ${config.targetLengthMin} minutes (~${wordBudget(
config.targetLengthMin
)} words total).`,
"",
"Speakers (use ONLY these keys in `speakerKey`):",
speakerList,
"",
"Structure the episode into 36 sections (e.g. intro, main segments, outro).",
"Each section has a short title and a list of turns. Each turn is one speaker's spoken line.",
"",
"Return JSON with this exact shape:",
`{
"title": "string — a catchy episode title",
"sections": [
{
"id": "kebab-case-id",
"title": "string",
"turns": [
{ "speakerKey": "host", "text": "spoken line..." }
]
}
]
}`,
]
.filter(Boolean)
.join("\n");
return [
{ role: "system" as const, content: system },
{ role: "user" as const, content: user },
];
}
export function buildSectionMessages(
config: EpisodeConfig,
script: StructuredScript,
sectionId: string
) {
const section = script.sections.find((s) => s.id === sectionId);
const speakerList = config.speakers.map((s) => `"${s.speakerKey}"=${s.displayName}`).join(", ");
const system =
"You are an expert podcast scriptwriter. Rewrite a single section of an existing episode, keeping the same speakers, tone, and language. Return STRICT JSON for just that one section.";
const user = [
`Episode title: ${script.title}`,
`Tone: ${config.tone}. Language: ${config.language}. Speakers: ${speakerList}.`,
"",
`Rewrite the section titled "${section?.title ?? sectionId}" (id "${sectionId}") to be fresh and engaging while serving the same purpose in the episode.`,
"",
"Return JSON with this exact shape:",
`{ "id": "${sectionId}", "title": "string", "turns": [ { "speakerKey": "host", "text": "..." } ] }`,
].join("\n");
return [
{ role: "system" as const, content: system },
{ role: "user" as const, content: user },
];
}
+97
View File
@@ -0,0 +1,97 @@
import type { AudioProvider, DialogueTurn, Voice } from "../types";
const API = "https://api.elevenlabs.io/v1";
const TTS_MODEL = process.env.ELEVENLABS_TTS_MODEL ?? "eleven_multilingual_v2";
const DIALOGUE_MODEL = process.env.ELEVENLABS_DIALOGUE_MODEL ?? "eleven_v3";
const OUTPUT_FORMAT = "mp3_44100_128";
function apiKey(): string {
const k = process.env.ELEVENLABS_API_KEY;
if (!k) throw new Error("ELEVENLABS_API_KEY is not set");
return k;
}
interface ElevenVoice {
voice_id: string;
name: string;
preview_url?: string;
labels?: Record<string, string>;
}
export class ElevenLabsAudioProvider implements AudioProvider {
// Kept safely under the ~2,000-char dialogue limit to leave headroom.
readonly maxCharsPerRequest = 1800;
async synthesizeSpeech(
text: string,
voiceId: string,
_opts?: { language?: string }
): Promise<{ audio: Buffer; characters: number }> {
const res = await fetch(
`${API}/text-to-speech/${voiceId}?output_format=${OUTPUT_FORMAT}`,
{
method: "POST",
headers: {
"xi-api-key": apiKey(),
"Content-Type": "application/json",
accept: "audio/mpeg",
},
body: JSON.stringify({
text,
model_id: TTS_MODEL,
voice_settings: { stability: 0.5, similarity_boost: 0.75 },
}),
}
);
if (!res.ok) throw new Error(`ElevenLabs TTS ${res.status}: ${await safeText(res)}`);
return { audio: Buffer.from(await res.arrayBuffer()), characters: text.length };
}
async synthesizeDialogue(
turns: DialogueTurn[],
_opts?: { language?: string }
): Promise<{ audio: Buffer; characters: number }> {
const res = await fetch(`${API}/text-to-dialogue?output_format=${OUTPUT_FORMAT}`, {
method: "POST",
headers: {
"xi-api-key": apiKey(),
"Content-Type": "application/json",
accept: "audio/mpeg",
},
body: JSON.stringify({
inputs: turns.map((t) => ({ text: t.text, voice_id: t.voiceId })),
model_id: DIALOGUE_MODEL,
}),
});
if (!res.ok) throw new Error(`ElevenLabs dialogue ${res.status}: ${await safeText(res)}`);
const characters = turns.reduce((n, t) => n + t.text.length, 0);
return { audio: Buffer.from(await res.arrayBuffer()), characters };
}
async listVoices(): Promise<Voice[]> {
const res = await fetch(`${API}/voices`, { headers: { "xi-api-key": apiKey() } });
if (!res.ok) throw new Error(`ElevenLabs voices ${res.status}`);
const data = (await res.json()) as { voices?: ElevenVoice[] };
return (data.voices ?? []).map((v) => ({
id: v.voice_id,
name: v.name,
gender: normalizeGender(v.labels?.gender),
accent: v.labels?.accent,
description: v.labels?.description,
previewUrl: v.preview_url,
}));
}
}
function normalizeGender(g?: string): Voice["gender"] {
if (g === "male" || g === "female") return g;
return "neutral";
}
async function safeText(res: Response): Promise<string> {
try {
return await res.text();
} catch {
return res.statusText;
}
}
+21
View File
@@ -0,0 +1,21 @@
import { OpenAIScriptProvider } from "./openai-script";
import { ElevenLabsAudioProvider } from "./elevenlabs-audio";
import { OpenAIArtProvider } from "./openai-art";
import type { ArtProvider, AudioProvider, ScriptProvider } from "../types";
// Registry of active providers. Swapping a model later = change one line here.
let script: ScriptProvider | null = null;
let audio: AudioProvider | null = null;
let art: ArtProvider | null = null;
export function scriptProvider(): ScriptProvider {
return (script ??= new OpenAIScriptProvider());
}
export function audioProvider(): AudioProvider {
return (audio ??= new ElevenLabsAudioProvider());
}
export function artProvider(): ArtProvider {
return (art ??= new OpenAIArtProvider());
}
+36
View File
@@ -0,0 +1,36 @@
import { openai, ART_MODEL } from "../openai";
import type { ArtProvider } from "../types";
export class OpenAIArtProvider implements ArtProvider {
readonly model = ART_MODEL;
async generateCover(
prompt: string,
opts?: { size?: "1024x1024" }
): Promise<{ data: Buffer; revisedPrompt?: string }> {
const res = await openai().images.generate({
model: this.model,
prompt,
n: 1,
size: opts?.size ?? "1024x1024",
response_format: "b64_json",
});
const item = res.data?.[0];
if (!item?.b64_json) throw new Error("DALL·E returned no image data");
return {
data: Buffer.from(item.b64_json, "base64"),
revisedPrompt: item.revised_prompt,
};
}
}
/** Build a cover-art prompt for an episode topic. */
export function buildCoverPrompt(topic: string, tone: string, title?: string): string {
return [
`Podcast cover art for an episode titled "${title ?? topic}".`,
`Topic: ${topic}. Mood/tone: ${tone}.`,
"Modern, bold, eye-catching square album-cover style.",
"Strong focal subject, clean composition, vibrant but tasteful colors.",
"No text, no words, no letters, no logos.",
].join(" ");
}
+90
View File
@@ -0,0 +1,90 @@
import { z } from "zod";
import { openai, SCRIPT_MODEL } from "../openai";
import { buildScriptMessages, buildSectionMessages } from "../prompts/script";
import type {
EpisodeConfig,
ScriptProvider,
ScriptSection,
StructuredScript,
TokenUsage,
} from "../types";
const turnSchema = z.object({
speakerKey: z.string().min(1),
text: z.string().min(1),
});
const sectionSchema = z.object({
id: z.string().min(1),
title: z.string().min(1),
turns: z.array(turnSchema).min(1),
});
const scriptSchema = z.object({
title: z.string().min(1),
sections: z.array(sectionSchema).min(1),
});
/** Coerce/repair speakerKeys the model may have invented to the configured set. */
function normalizeSpeakers(script: StructuredScript, config: EpisodeConfig): StructuredScript {
const valid = new Set(config.speakers.map((s) => s.speakerKey));
const fallback = config.speakers[0]?.speakerKey ?? "host";
return {
...script,
sections: script.sections.map((sec) => ({
...sec,
turns: sec.turns.map((t) => ({
...t,
speakerKey: valid.has(t.speakerKey) ? t.speakerKey : fallback,
})),
})),
};
}
function usageFrom(u: { prompt_tokens?: number; completion_tokens?: number } | undefined): TokenUsage {
return { inputTokens: u?.prompt_tokens ?? 0, outputTokens: u?.completion_tokens ?? 0 };
}
export class OpenAIScriptProvider implements ScriptProvider {
readonly model = SCRIPT_MODEL;
async generate(config: EpisodeConfig): Promise<{ script: StructuredScript; usage: TokenUsage }> {
const res = await openai().chat.completions.create({
model: this.model,
messages: buildScriptMessages(config),
response_format: { type: "json_object" },
temperature: 0.8,
});
const content = res.choices[0]?.message?.content ?? "{}";
const parsed = scriptSchema.parse(JSON.parse(content));
return { script: normalizeSpeakers(parsed, config), usage: usageFrom(res.usage) };
}
async regenerateSection(
config: EpisodeConfig,
script: StructuredScript,
sectionId: string
): Promise<{ section: ScriptSection; usage: TokenUsage }> {
const res = await openai().chat.completions.create({
model: this.model,
messages: buildSectionMessages(config, script, sectionId),
response_format: { type: "json_object" },
temperature: 0.9,
});
const content = res.choices[0]?.message?.content ?? "{}";
const section = sectionSchema.parse(JSON.parse(content));
const valid = new Set(config.speakers.map((s) => s.speakerKey));
const fallback = config.speakers[0]?.speakerKey ?? "host";
return {
section: {
...section,
id: sectionId,
turns: section.turns.map((t) => ({
...t,
speakerKey: valid.has(t.speakerKey) ? t.speakerKey : fallback,
})),
},
usage: usageFrom(res.usage),
};
}
}
+48
View File
@@ -0,0 +1,48 @@
import { z } from "zod";
import { openai, SCRIPT_MODEL } from "./openai";
import type { TokenUsage } from "./types";
const seasonSchema = z.object({
title: z.string().min(1),
description: z.string().min(1),
episodes: z
.array(z.object({ title: z.string().min(1), topic: z.string().min(1), summary: z.string().min(1) }))
.min(1),
});
export type SeasonPlan = z.infer<typeof seasonSchema>;
export async function planSeason(input: {
theme: string;
count: number;
tone: string;
audience?: string;
language: string;
}): Promise<{ plan: SeasonPlan; usage: TokenUsage }> {
const res = await openai().chat.completions.create({
model: SCRIPT_MODEL,
messages: [
{
role: "system",
content:
"You are a podcast showrunner planning a cohesive season. Return STRICT JSON: { \"title\": string, \"description\": string, \"episodes\": [{ \"title\": string, \"topic\": string, \"summary\": string }] }.",
},
{
role: "user",
content: `Plan a ${input.count}-episode podcast season about: ${input.theme}.
Tone: ${input.tone}. ${input.audience ? `Audience: ${input.audience}.` : ""} Language: ${input.language}.
Give the season a title and short description, then ${input.count} episodes, each with a catchy title, a specific topic to cover, and a one-sentence summary.`,
},
],
response_format: { type: "json_object" },
temperature: 0.85,
});
const plan = seasonSchema.parse(JSON.parse(res.choices[0]?.message?.content ?? "{}"));
return {
plan,
usage: {
inputTokens: res.usage?.prompt_tokens ?? 0,
outputTokens: res.usage?.completion_tokens ?? 0,
},
};
}
+103
View File
@@ -0,0 +1,103 @@
/**
* Provider abstraction for the three AI capabilities. Each capability has a thin
* interface so the underlying model (GPT-4, ElevenLabs, DALL·E) can be swapped
* via the registry in providers/index.ts without touching call sites.
*/
export type EpisodeFormat = "SOLO" | "INTERVIEW" | "MULTI_HOST";
export interface SpeakerRole {
/** Stable key referenced by script turns, e.g. "host", "guest", "cohost". */
speakerKey: string;
displayName: string;
}
export interface EpisodeConfig {
title?: string;
topic: string;
tone: string;
format: EpisodeFormat;
/** ISO language code, e.g. "en", "es". */
language: string;
targetLengthMin: number;
audience?: string;
speakers: SpeakerRole[];
}
// ─────────────── Script ───────────────
export interface ScriptTurn {
speakerKey: string;
text: string;
}
export interface ScriptSection {
id: string;
title: string;
turns: ScriptTurn[];
}
export interface StructuredScript {
title: string;
sections: ScriptSection[];
}
export interface TokenUsage {
inputTokens: number;
outputTokens: number;
}
export interface ScriptProvider {
readonly model: string;
generate(config: EpisodeConfig): Promise<{ script: StructuredScript; usage: TokenUsage }>;
regenerateSection(
config: EpisodeConfig,
script: StructuredScript,
sectionId: string
): Promise<{ section: ScriptSection; usage: TokenUsage }>;
}
// ─────────────── Audio ───────────────
export interface Voice {
id: string;
name: string;
gender?: "male" | "female" | "neutral";
accent?: string;
description?: string;
previewUrl?: string;
}
/** One line of multi-voice dialogue. */
export interface DialogueTurn {
text: string;
voiceId: string;
}
export interface AudioProvider {
/** Synthesize a single voice reading (used for SOLO and as a fallback). */
synthesizeSpeech(
text: string,
voiceId: string,
opts?: { language?: string }
): Promise<{ audio: Buffer; characters: number }>;
/** Synthesize a multi-voice dialogue chunk (≤ provider char limit, ≤10 voices). */
synthesizeDialogue(
turns: DialogueTurn[],
opts?: { language?: string }
): Promise<{ audio: Buffer; characters: number }>;
/** Live voice catalog for the account. */
listVoices(): Promise<Voice[]>;
/** Hard cap on characters per synthesis request (drives segmentation). */
readonly maxCharsPerRequest: number;
}
// ─────────────── Art ───────────────
export interface ArtProvider {
readonly model: string;
generateCover(
prompt: string,
opts?: { size?: "1024x1024" }
): Promise<{ data: Buffer; revisedPrompt?: string }>;
}
+36
View File
@@ -0,0 +1,36 @@
import type { Voice } from "./types";
/**
* Curated catalog of ElevenLabs premade voices (stable public voice IDs available
* to all accounts). Used by the create-episode wizard so it can render the voice
* picker without a live API call. The provider's listVoices() returns the live
* account catalog when needed.
*/
export const VOICE_CATALOG: Voice[] = [
{ id: "21m00Tcm4TlvDq8ikWAM", name: "Rachel", gender: "female", accent: "American", description: "Calm, narrational" },
{ id: "EXAVITQu4vr4xnSDxMaL", name: "Sarah", gender: "female", accent: "American", description: "Soft, news" },
{ id: "FGY2WhTYpPnrIDTdsKH5", name: "Laura", gender: "female", accent: "American", description: "Upbeat, social" },
{ id: "XB0fDUnXU5powFXDhCwa", name: "Charlotte", gender: "female", accent: "British", description: "Warm, seductive" },
{ id: "XrExE9yKIg1WjnnlVkGX", name: "Matilda", gender: "female", accent: "American", description: "Friendly, warm" },
{ id: "pFZP5JQG7iQjIQuC4Bku", name: "Lily", gender: "female", accent: "British", description: "Confident narration" },
{ id: "cgSgspJ2msm6clMCkdW9", name: "Jessica", gender: "female", accent: "American", description: "Expressive, young" },
{ id: "9BWtsMINqrJLrRacOk9x", name: "Aria", gender: "female", accent: "American", description: "Husky, expressive" },
{ id: "pNInz6obpgDQGcFmaJgB", name: "Adam", gender: "male", accent: "American", description: "Deep, narration" },
{ id: "JBFqnCBsd6RMkjVDRZzb", name: "George", gender: "male", accent: "British", description: "Warm, mature" },
{ id: "TX3LPaxmHKxFdv7VOQHJ", name: "Liam", gender: "male", accent: "American", description: "Articulate, young" },
{ id: "onwK4e9ZLuTAKqWW03F9", name: "Daniel", gender: "male", accent: "British", description: "Authoritative, news" },
{ id: "nPczCjzI2devNBz1zQrb", name: "Brian", gender: "male", accent: "American", description: "Deep, mature" },
{ id: "iP95p4xoKVk53GoZ742B", name: "Chris", gender: "male", accent: "American", description: "Casual, conversational" },
{ id: "bIHbv24MWmeRgasZH58o", name: "Will", gender: "male", accent: "American", description: "Friendly, chill" },
{ id: "cjVigY5qzO86Huf0OWal", name: "Eric", gender: "male", accent: "American", description: "Smooth, classy" },
];
export const DEFAULT_VOICE_IDS: Record<string, string> = {
host: "21m00Tcm4TlvDq8ikWAM", // Rachel
guest: "pNInz6obpgDQGcFmaJgB", // Adam
cohost: "JBFqnCBsd6RMkjVDRZzb", // George
};
export function voiceById(id: string): Voice | undefined {
return VOICE_CATALOG.find((v) => v.id === id);
}