Initial commit: PodcastYes — AI podcast platform
This commit is contained in:
@@ -0,0 +1,97 @@
|
||||
import type { AudioProvider, DialogueTurn, Voice } from "../types";
|
||||
|
||||
const API = "https://api.elevenlabs.io/v1";
|
||||
const TTS_MODEL = process.env.ELEVENLABS_TTS_MODEL ?? "eleven_multilingual_v2";
|
||||
const DIALOGUE_MODEL = process.env.ELEVENLABS_DIALOGUE_MODEL ?? "eleven_v3";
|
||||
const OUTPUT_FORMAT = "mp3_44100_128";
|
||||
|
||||
function apiKey(): string {
|
||||
const k = process.env.ELEVENLABS_API_KEY;
|
||||
if (!k) throw new Error("ELEVENLABS_API_KEY is not set");
|
||||
return k;
|
||||
}
|
||||
|
||||
interface ElevenVoice {
|
||||
voice_id: string;
|
||||
name: string;
|
||||
preview_url?: string;
|
||||
labels?: Record<string, string>;
|
||||
}
|
||||
|
||||
export class ElevenLabsAudioProvider implements AudioProvider {
|
||||
// Kept safely under the ~2,000-char dialogue limit to leave headroom.
|
||||
readonly maxCharsPerRequest = 1800;
|
||||
|
||||
async synthesizeSpeech(
|
||||
text: string,
|
||||
voiceId: string,
|
||||
_opts?: { language?: string }
|
||||
): Promise<{ audio: Buffer; characters: number }> {
|
||||
const res = await fetch(
|
||||
`${API}/text-to-speech/${voiceId}?output_format=${OUTPUT_FORMAT}`,
|
||||
{
|
||||
method: "POST",
|
||||
headers: {
|
||||
"xi-api-key": apiKey(),
|
||||
"Content-Type": "application/json",
|
||||
accept: "audio/mpeg",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
text,
|
||||
model_id: TTS_MODEL,
|
||||
voice_settings: { stability: 0.5, similarity_boost: 0.75 },
|
||||
}),
|
||||
}
|
||||
);
|
||||
if (!res.ok) throw new Error(`ElevenLabs TTS ${res.status}: ${await safeText(res)}`);
|
||||
return { audio: Buffer.from(await res.arrayBuffer()), characters: text.length };
|
||||
}
|
||||
|
||||
async synthesizeDialogue(
|
||||
turns: DialogueTurn[],
|
||||
_opts?: { language?: string }
|
||||
): Promise<{ audio: Buffer; characters: number }> {
|
||||
const res = await fetch(`${API}/text-to-dialogue?output_format=${OUTPUT_FORMAT}`, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"xi-api-key": apiKey(),
|
||||
"Content-Type": "application/json",
|
||||
accept: "audio/mpeg",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
inputs: turns.map((t) => ({ text: t.text, voice_id: t.voiceId })),
|
||||
model_id: DIALOGUE_MODEL,
|
||||
}),
|
||||
});
|
||||
if (!res.ok) throw new Error(`ElevenLabs dialogue ${res.status}: ${await safeText(res)}`);
|
||||
const characters = turns.reduce((n, t) => n + t.text.length, 0);
|
||||
return { audio: Buffer.from(await res.arrayBuffer()), characters };
|
||||
}
|
||||
|
||||
async listVoices(): Promise<Voice[]> {
|
||||
const res = await fetch(`${API}/voices`, { headers: { "xi-api-key": apiKey() } });
|
||||
if (!res.ok) throw new Error(`ElevenLabs voices ${res.status}`);
|
||||
const data = (await res.json()) as { voices?: ElevenVoice[] };
|
||||
return (data.voices ?? []).map((v) => ({
|
||||
id: v.voice_id,
|
||||
name: v.name,
|
||||
gender: normalizeGender(v.labels?.gender),
|
||||
accent: v.labels?.accent,
|
||||
description: v.labels?.description,
|
||||
previewUrl: v.preview_url,
|
||||
}));
|
||||
}
|
||||
}
|
||||
|
||||
function normalizeGender(g?: string): Voice["gender"] {
|
||||
if (g === "male" || g === "female") return g;
|
||||
return "neutral";
|
||||
}
|
||||
|
||||
async function safeText(res: Response): Promise<string> {
|
||||
try {
|
||||
return await res.text();
|
||||
} catch {
|
||||
return res.statusText;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,21 @@
|
||||
import { OpenAIScriptProvider } from "./openai-script";
|
||||
import { ElevenLabsAudioProvider } from "./elevenlabs-audio";
|
||||
import { OpenAIArtProvider } from "./openai-art";
|
||||
import type { ArtProvider, AudioProvider, ScriptProvider } from "../types";
|
||||
|
||||
// Registry of active providers. Swapping a model later = change one line here.
|
||||
let script: ScriptProvider | null = null;
|
||||
let audio: AudioProvider | null = null;
|
||||
let art: ArtProvider | null = null;
|
||||
|
||||
export function scriptProvider(): ScriptProvider {
|
||||
return (script ??= new OpenAIScriptProvider());
|
||||
}
|
||||
|
||||
export function audioProvider(): AudioProvider {
|
||||
return (audio ??= new ElevenLabsAudioProvider());
|
||||
}
|
||||
|
||||
export function artProvider(): ArtProvider {
|
||||
return (art ??= new OpenAIArtProvider());
|
||||
}
|
||||
@@ -0,0 +1,36 @@
|
||||
import { openai, ART_MODEL } from "../openai";
|
||||
import type { ArtProvider } from "../types";
|
||||
|
||||
export class OpenAIArtProvider implements ArtProvider {
|
||||
readonly model = ART_MODEL;
|
||||
|
||||
async generateCover(
|
||||
prompt: string,
|
||||
opts?: { size?: "1024x1024" }
|
||||
): Promise<{ data: Buffer; revisedPrompt?: string }> {
|
||||
const res = await openai().images.generate({
|
||||
model: this.model,
|
||||
prompt,
|
||||
n: 1,
|
||||
size: opts?.size ?? "1024x1024",
|
||||
response_format: "b64_json",
|
||||
});
|
||||
const item = res.data?.[0];
|
||||
if (!item?.b64_json) throw new Error("DALL·E returned no image data");
|
||||
return {
|
||||
data: Buffer.from(item.b64_json, "base64"),
|
||||
revisedPrompt: item.revised_prompt,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/** Build a cover-art prompt for an episode topic. */
|
||||
export function buildCoverPrompt(topic: string, tone: string, title?: string): string {
|
||||
return [
|
||||
`Podcast cover art for an episode titled "${title ?? topic}".`,
|
||||
`Topic: ${topic}. Mood/tone: ${tone}.`,
|
||||
"Modern, bold, eye-catching square album-cover style.",
|
||||
"Strong focal subject, clean composition, vibrant but tasteful colors.",
|
||||
"No text, no words, no letters, no logos.",
|
||||
].join(" ");
|
||||
}
|
||||
@@ -0,0 +1,90 @@
|
||||
import { z } from "zod";
|
||||
import { openai, SCRIPT_MODEL } from "../openai";
|
||||
import { buildScriptMessages, buildSectionMessages } from "../prompts/script";
|
||||
import type {
|
||||
EpisodeConfig,
|
||||
ScriptProvider,
|
||||
ScriptSection,
|
||||
StructuredScript,
|
||||
TokenUsage,
|
||||
} from "../types";
|
||||
|
||||
const turnSchema = z.object({
|
||||
speakerKey: z.string().min(1),
|
||||
text: z.string().min(1),
|
||||
});
|
||||
|
||||
const sectionSchema = z.object({
|
||||
id: z.string().min(1),
|
||||
title: z.string().min(1),
|
||||
turns: z.array(turnSchema).min(1),
|
||||
});
|
||||
|
||||
const scriptSchema = z.object({
|
||||
title: z.string().min(1),
|
||||
sections: z.array(sectionSchema).min(1),
|
||||
});
|
||||
|
||||
/** Coerce/repair speakerKeys the model may have invented to the configured set. */
|
||||
function normalizeSpeakers(script: StructuredScript, config: EpisodeConfig): StructuredScript {
|
||||
const valid = new Set(config.speakers.map((s) => s.speakerKey));
|
||||
const fallback = config.speakers[0]?.speakerKey ?? "host";
|
||||
return {
|
||||
...script,
|
||||
sections: script.sections.map((sec) => ({
|
||||
...sec,
|
||||
turns: sec.turns.map((t) => ({
|
||||
...t,
|
||||
speakerKey: valid.has(t.speakerKey) ? t.speakerKey : fallback,
|
||||
})),
|
||||
})),
|
||||
};
|
||||
}
|
||||
|
||||
function usageFrom(u: { prompt_tokens?: number; completion_tokens?: number } | undefined): TokenUsage {
|
||||
return { inputTokens: u?.prompt_tokens ?? 0, outputTokens: u?.completion_tokens ?? 0 };
|
||||
}
|
||||
|
||||
export class OpenAIScriptProvider implements ScriptProvider {
|
||||
readonly model = SCRIPT_MODEL;
|
||||
|
||||
async generate(config: EpisodeConfig): Promise<{ script: StructuredScript; usage: TokenUsage }> {
|
||||
const res = await openai().chat.completions.create({
|
||||
model: this.model,
|
||||
messages: buildScriptMessages(config),
|
||||
response_format: { type: "json_object" },
|
||||
temperature: 0.8,
|
||||
});
|
||||
const content = res.choices[0]?.message?.content ?? "{}";
|
||||
const parsed = scriptSchema.parse(JSON.parse(content));
|
||||
return { script: normalizeSpeakers(parsed, config), usage: usageFrom(res.usage) };
|
||||
}
|
||||
|
||||
async regenerateSection(
|
||||
config: EpisodeConfig,
|
||||
script: StructuredScript,
|
||||
sectionId: string
|
||||
): Promise<{ section: ScriptSection; usage: TokenUsage }> {
|
||||
const res = await openai().chat.completions.create({
|
||||
model: this.model,
|
||||
messages: buildSectionMessages(config, script, sectionId),
|
||||
response_format: { type: "json_object" },
|
||||
temperature: 0.9,
|
||||
});
|
||||
const content = res.choices[0]?.message?.content ?? "{}";
|
||||
const section = sectionSchema.parse(JSON.parse(content));
|
||||
const valid = new Set(config.speakers.map((s) => s.speakerKey));
|
||||
const fallback = config.speakers[0]?.speakerKey ?? "host";
|
||||
return {
|
||||
section: {
|
||||
...section,
|
||||
id: sectionId,
|
||||
turns: section.turns.map((t) => ({
|
||||
...t,
|
||||
speakerKey: valid.has(t.speakerKey) ? t.speakerKey : fallback,
|
||||
})),
|
||||
},
|
||||
usage: usageFrom(res.usage),
|
||||
};
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user