Initial commit: PodcastYes — AI podcast platform

2026-06-07 03:58:32 -04:00
commit 155507f21a
151 changed files with 19826 additions and 0 deletions
@@ -0,0 +1,228 @@
+import { Prisma } from "@prisma/client";
+import { prisma } from "@/lib/db";
+import { setEpisodeStatus } from "@/lib/episodes/status";
+import { scriptProvider, audioProvider, artProvider } from "@/lib/ai/providers";
+import { buildCoverPrompt } from "@/lib/ai/providers/openai-art";
+import { segmentScript } from "./segment";
+import { stitchMp3 } from "./stitch";
+import { storage, assetKey } from "@/lib/storage";
+import { recordCost, scriptCostUsd, audioCostUsd, artCostUsd } from "@/lib/ai/cost";
+import { incrementUsage } from "@/lib/usage/meter";
+import { sendEmail, emailLayout } from "@/lib/email";
+import { DEFAULT_VOICE_IDS } from "@/lib/ai/voices";
+import type { EpisodeConfig, StructuredScript } from "@/lib/ai/types";
+import type { GenerationType } from "@/lib/queue/jobs";
+
+type EpisodeWithRelations = Prisma.EpisodeGetPayload<{
+  include: { speakers: true; user: true };
+}>;
+
+/**
+ * The episode generation pipeline, run by the worker.
+ * Stages: script → segment → synthesize → stitch → art → save → meter.
+ * `type` selects which stages run (full, or a single re-generation).
+ */
+export async function runEpisodeGeneration(
+  episodeId: string,
+  type: GenerationType = "full"
+): Promise<void> {
+  const episode = await loadEpisode(episodeId);
+  const config = toConfig(episode);
+
+  const did = { script: false, audio: false, art: false };
+
+  if (type === "full" || type === "script") {
+    await generateScript(episode, config);
+    did.script = true;
+  }
+  if (type === "full" || type === "script" || type === "audio") {
+    await generateAudio(episode);
+    did.audio = true;
+  }
+  if (type === "full" || type === "art") {
+    await generateArt(episode);
+    did.art = true;
+  }
+
+  await setEpisodeStatus(episodeId, "SAVING", { stage: "Finalizing your episode" });
+  await meter(episode, did);
+  await setEpisodeStatus(episodeId, "READY", { stage: "Done" });
+  await notifyReady(episode);
+}
+
+async function loadEpisode(episodeId: string): Promise<EpisodeWithRelations> {
+  const episode = await prisma.episode.findUnique({
+    where: { id: episodeId },
+    include: { speakers: true, user: true },
+  });
+  if (!episode) throw new Error(`Episode ${episodeId} not found`);
+  return episode;
+}
+
+function toConfig(episode: EpisodeWithRelations): EpisodeConfig {
+  const speakers =
+    episode.speakers.length > 0
+      ? episode.speakers.map((s) => ({ speakerKey: s.speakerKey, displayName: s.displayName }))
+      : [{ speakerKey: "host", displayName: "Host" }];
+  return {
+    title: episode.title,
+    topic: episode.topic,
+    tone: episode.tone,
+    format: episode.format,
+    language: episode.language,
+    targetLengthMin: episode.targetLengthMin,
+    audience: episode.audience ?? undefined,
+    speakers,
+  };
+}
+
+// ─────────────── Stage 1: script ───────────────
+async function generateScript(episode: EpisodeWithRelations, config: EpisodeConfig) {
+  await setEpisodeStatus(episode.id, "SCRIPTING", { stage: "Writing the script" });
+  const { script, usage } = await scriptProvider().generate(config);
+
+  await prisma.script.upsert({
+    where: { episodeId: episode.id },
+    create: {
+      episodeId: episode.id,
+      content: script as unknown as Prisma.InputJsonValue,
+      model: scriptProvider().model,
+    },
+    update: { content: script as unknown as Prisma.InputJsonValue, version: { increment: 1 } },
+  });
+
+  // Adopt the generated title when the user didn't set one.
+  if (!episode.title?.trim() && script.title) {
+    await prisma.episode.update({ where: { id: episode.id }, data: { title: script.title } });
+    episode.title = script.title;
+  }
+
+  await recordCost({
+    provider: "openai",
+    operation: "script",
+    units: usage.inputTokens + usage.outputTokens,
+    costUsd: scriptCostUsd(usage),
+    episodeId: episode.id,
+    userId: episode.userId,
+  });
+}
+
+// ─────────────── Stages 2–4: segment → synthesize → stitch ───────────────
+async function generateAudio(episode: EpisodeWithRelations) {
+  await setEpisodeStatus(episode.id, "SYNTHESIZING", { stage: "Recording the audio" });
+
+  const scriptRow = await prisma.script.findUnique({ where: { episodeId: episode.id } });
+  if (!scriptRow) throw new Error("Cannot synthesize audio before a script exists");
+  const script = scriptRow.content as unknown as StructuredScript;
+
+  const voiceMap: Record<string, string> = {};
+  for (const s of episode.speakers) voiceMap[s.speakerKey] = s.elevenVoiceId;
+  const fallback =
+    episode.speakers[0]?.elevenVoiceId ?? DEFAULT_VOICE_IDS.host;
+
+  const provider = audioProvider();
+  const segments = segmentScript(script, voiceMap, fallback, provider.maxCharsPerRequest);
+  if (segments.length === 0) throw new Error("Script produced no spoken lines");
+
+  const buffers: Buffer[] = [];
+  let totalChars = 0;
+  for (const seg of segments) {
+    const res =
+      seg.uniqueVoices <= 1
+        ? await provider.synthesizeSpeech(
+            seg.turns.map((t) => t.text).join(" "),
+            seg.turns[0].voiceId,
+            { language: episode.language }
+          )
+        : await provider.synthesizeDialogue(seg.turns, { language: episode.language });
+    buffers.push(res.audio);
+    totalChars += res.characters;
+  }
+
+  await setEpisodeStatus(episode.id, "STITCHING", { stage: "Mixing the audio" });
+  const { data, durationSec } = await stitchMp3(buffers);
+
+  const key = assetKey("mp3", `${episode.id}.mp3`);
+  await storage().put(key, data, "audio/mpeg");
+
+  await prisma.audioAsset.upsert({
+    where: { episodeId: episode.id },
+    create: {
+      episodeId: episode.id,
+      storageKey: key,
+      durationSec,
+      sizeBytes: data.length,
+      segments: { count: segments.length } as Prisma.InputJsonValue,
+    },
+    update: {
+      storageKey: key,
+      durationSec,
+      sizeBytes: data.length,
+      segments: { count: segments.length } as Prisma.InputJsonValue,
+    },
+  });
+
+  await recordCost({
+    provider: "elevenlabs",
+    operation: "audio",
+    units: totalChars,
+    costUsd: audioCostUsd(totalChars),
+    episodeId: episode.id,
+    userId: episode.userId,
+  });
+}
+
+// ─────────────── Stage 5: cover art ───────────────
+async function generateArt(episode: EpisodeWithRelations) {
+  await setEpisodeStatus(episode.id, "ART", { stage: "Designing the cover art" });
+  const prompt = buildCoverPrompt(episode.topic, episode.tone, episode.title);
+  const { data, revisedPrompt } = await artProvider().generateCover(prompt);
+
+  const key = assetKey("art", `${episode.id}.png`);
+  await storage().put(key, data, "image/png");
+
+  await prisma.coverArt.upsert({
+    where: { episodeId: episode.id },
+    create: { episodeId: episode.id, storageKey: key, prompt: revisedPrompt ?? prompt, model: artProvider().model },
+    update: { storageKey: key, prompt: revisedPrompt ?? prompt },
+  });
+
+  await recordCost({
+    provider: "openai",
+    operation: "art",
+    units: 1,
+    costUsd: artCostUsd(1),
+    episodeId: episode.id,
+    userId: episode.userId,
+  });
+}
+
+// ─────────────── Stage 7: meter ───────────────
+async function meter(
+  episode: EpisodeWithRelations,
+  did: { script: boolean; audio: boolean; art: boolean }
+) {
+  const ownerId = episode.organizationId ?? episode.userId;
+  const ownerType = episode.organizationId ? "organization" : "user";
+  if (did.script) await incrementUsage(ownerId, ownerType, "script");
+  if (did.audio) await incrementUsage(ownerId, ownerType, "audio");
+  if (did.art) await incrementUsage(ownerId, ownerType, "art");
+}
+
+async function notifyReady(episode: EpisodeWithRelations) {
+  const appUrl = process.env.NEXT_PUBLIC_APP_URL ?? "http://localhost:3000";
+  try {
+    await sendEmail({
+      to: episode.user.email,
+      subject: `🎙️ "${episode.title}" is ready`,
+      html: emailLayout(
+        "Your episode is ready",
+        `“${episode.title}” has finished generating — script, audio, and cover art are all set.`,
+        { label: "Open episode", url: `${appUrl}/episodes/${episode.id}` }
+      ),
+      text: `Your episode "${episode.title}" is ready: ${appUrl}/episodes/${episode.id}`,
+    });
+  } catch (err) {
+    console.error("[notifyReady] email failed (non-fatal)", err);
+  }
+}
@@ -0,0 +1,53 @@
+import { z } from "zod";
+import { openai, SCRIPT_MODEL } from "@/lib/ai/openai";
+import type { StructuredScript, TokenUsage } from "@/lib/ai/types";
+
+export type RepurposeFormat = "blog" | "social_thread" | "newsletter";
+
+const FORMAT_PROMPTS: Record<RepurposeFormat, string> = {
+  blog: "Write an engaging, SEO-friendly blog post based on this episode. Include a compelling title and well-structured markdown body with headings and a short conclusion.",
+  social_thread:
+    "Write a punchy social thread (6–10 posts, numbered) summarizing the episode's best insights. Start with a strong hook. Put the whole thread in the markdown body.",
+  newsletter:
+    "Write a friendly email newsletter edition about this episode: a subject line as the title, a short intro, 3–4 key takeaways as bullets, and a call-to-action to listen. Markdown body.",
+};
+
+const outputSchema = z.object({ title: z.string().min(1), body: z.string().min(1) });
+export type RepurposedOutput = z.infer<typeof outputSchema>;
+
+function scriptToText(script: StructuredScript): string {
+  return script.sections
+    .map((s) => `## ${s.title}\n` + s.turns.map((t) => t.text).join("\n"))
+    .join("\n\n");
+}
+
+export async function repurposeScript(
+  script: StructuredScript,
+  format: RepurposeFormat
+): Promise<{ content: RepurposedOutput; usage: TokenUsage }> {
+  const transcript = scriptToText(script).slice(0, 9000);
+  const res = await openai().chat.completions.create({
+    model: SCRIPT_MODEL,
+    messages: [
+      {
+        role: "system",
+        content:
+          "You are a content marketer who repurposes podcast episodes into other formats. Return STRICT JSON: { \"title\": string, \"body\": string } where body is markdown.",
+      },
+      {
+        role: "user",
+        content: `${FORMAT_PROMPTS[format]}\n\nEpisode title: ${script.title}\n\nTranscript:\n${transcript}`,
+      },
+    ],
+    response_format: { type: "json_object" },
+    temperature: 0.7,
+  });
+  const content = outputSchema.parse(JSON.parse(res.choices[0]?.message?.content ?? "{}"));
+  return {
+    content,
+    usage: {
+      inputTokens: res.usage?.prompt_tokens ?? 0,
+      outputTokens: res.usage?.completion_tokens ?? 0,
+    },
+  };
+}
@@ -0,0 +1,110 @@
+import type { DialogueTurn, ScriptSection, StructuredScript } from "../types";
+
+export interface AudioSegment {
+  turns: DialogueTurn[];
+  characters: number;
+  /** Distinct voices used in this segment (drives speech vs dialogue choice). */
+  uniqueVoices: number;
+}
+
+/** Map each script turn to a voice, dropping turns with empty text. */
+export function flattenTurns(
+  script: StructuredScript,
+  voiceMap: Record<string, string>,
+  fallbackVoiceId: string
+): DialogueTurn[] {
+  const turns: DialogueTurn[] = [];
+  for (const section of script.sections) {
+    for (const turn of section.turns) {
+      const text = turn.text.trim();
+      if (!text) continue;
+      turns.push({ text, voiceId: voiceMap[turn.speakerKey] ?? fallbackVoiceId });
+    }
+  }
+  return turns;
+}
+
+/** Split text longer than maxChars at sentence boundaries (then hard-wrap if needed). */
+export function splitLongText(text: string, maxChars: number): string[] {
+  if (text.length <= maxChars) return [text];
+  const sentences = text.match(/[^.!?]+[.!?]*\s*/g) ?? [text];
+  const parts: string[] = [];
+  let current = "";
+  for (const sentence of sentences) {
+    if (sentence.length > maxChars) {
+      // A single very long sentence — hard-wrap on whitespace.
+      if (current) {
+        parts.push(current.trim());
+        current = "";
+      }
+      for (let i = 0; i < sentence.length; i += maxChars) {
+        parts.push(sentence.slice(i, i + maxChars).trim());
+      }
+      continue;
+    }
+    if ((current + sentence).length > maxChars) {
+      parts.push(current.trim());
+      current = sentence;
+    } else {
+      current += sentence;
+    }
+  }
+  if (current.trim()) parts.push(current.trim());
+  return parts.filter(Boolean);
+}
+
+/**
+ * Group dialogue turns into segments each within `maxChars`. Turns longer than
+ * the limit are split (preserving their voice). Each segment is later sent to
+ * ElevenLabs as one request, then all segment MP3s are stitched together.
+ */
+export function segmentTurns(turns: DialogueTurn[], maxChars: number): AudioSegment[] {
+  // First expand any oversized turns into multiple sub-turns.
+  const expanded: DialogueTurn[] = [];
+  for (const turn of turns) {
+    for (const piece of splitLongText(turn.text, maxChars)) {
+      expanded.push({ text: piece, voiceId: turn.voiceId });
+    }
+  }
+
+  const segments: AudioSegment[] = [];
+  let bucket: DialogueTurn[] = [];
+  let chars = 0;
+
+  const flush = () => {
+    if (bucket.length === 0) return;
+    segments.push({
+      turns: bucket,
+      characters: chars,
+      uniqueVoices: new Set(bucket.map((t) => t.voiceId)).size,
+    });
+    bucket = [];
+    chars = 0;
+  };
+
+  for (const turn of expanded) {
+    if (chars + turn.text.length > maxChars && bucket.length > 0) flush();
+    bucket.push(turn);
+    chars += turn.text.length;
+  }
+  flush();
+  return segments;
+}
+
+/** Convenience: full script → audio segments. */
+export function segmentScript(
+  script: StructuredScript,
+  voiceMap: Record<string, string>,
+  fallbackVoiceId: string,
+  maxChars: number
+): AudioSegment[] {
+  return segmentTurns(flattenTurns(script, voiceMap, fallbackVoiceId), maxChars);
+}
+
+/** Total characters across a script (for cost/limit estimation). */
+export function totalCharacters(sections: ScriptSection[]): number {
+  return sections.reduce(
+    (sum, s) => sum + s.turns.reduce((n, t) => n + t.text.length, 0),
+    0
+  );
+}
@@ -0,0 +1,60 @@
+import { promises as fs } from "node:fs";
+import os from "node:os";
+import path from "node:path";
+import { runFfmpeg, ffprobeDuration } from "../ffmpeg";
+
+/**
+ * Concatenate per-segment MP3 buffers into one normalized episode MP3.
+ *
+ * Segments are re-encoded (not stream-copied) through a single libmp3lame pass
+ * with loudness normalization, which guarantees a uniform codec/bitrate and
+ * avoids the header/timestamp glitches that `-c copy` concat can produce.
+ */
+export async function stitchMp3(
+  segments: Buffer[]
+): Promise<{ data: Buffer; durationSec: number | null }> {
+  if (segments.length === 0) throw new Error("No audio segments to stitch");
+
+  const dir = await fs.mkdtemp(path.join(os.tmpdir(), "podcastyes-"));
+  try {
+    const files: string[] = [];
+    for (let i = 0; i < segments.length; i++) {
+      const file = path.join(dir, `seg_${String(i).padStart(4, "0")}.mp3`);
+      await fs.writeFile(file, segments[i]);
+      files.push(file);
+    }
+
+    // concat demuxer list — forward slashes so it parses on Windows and Linux.
+    const listPath = path.join(dir, "list.txt");
+    const listBody = files
+      .map((f) => `file '${f.split(path.sep).join("/").replace(/'/g, "'\\''")}'`)
+      .join("\n");
+    await fs.writeFile(listPath, listBody);
+
+    const outPath = path.join(dir, "episode.mp3");
+    await runFfmpeg([
+      "-y",
+      "-f",
+      "concat",
+      "-safe",
+      "0",
+      "-i",
+      listPath,
+      "-af",
+      "loudnorm=I=-16:TP=-1.5:LRA=11",
+      "-c:a",
+      "libmp3lame",
+      "-b:a",
+      "128k",
+      "-ar",
+      "44100",
+      outPath,
+    ]);
+
+    const data = await fs.readFile(outPath);
+    const durationSec = await ffprobeDuration(outPath);
+    return { data, durationSec };
+  } finally {
+    await fs.rm(dir, { recursive: true, force: true }).catch(() => {});
+  }
+}