podcastdistributiona/lib/ai/moderation.ts

import { openai } from "./openai";

const MODERATION_MODEL = process.env.OPENAI_MODERATION_MODEL ?? "omni-moderation-latest";

export interface ModerationResult {
  flagged: boolean;
  /** OpenAI category keys that tripped, e.g. ["hate", "violence"]. */
  categories: string[];
}

/**
 * Screen text with OpenAI's moderation endpoint.
 *
 * Fails OPEN: if the moderation call errors (outage, quota), we log and return
 * `flagged: false` so a moderation hiccup never blocks the product. The result
 * is advisory — callers decide whether to reject input or flag generated output.
 */
export async function moderateText(input: string): Promise<ModerationResult> {
  const text = input.trim();
  if (!text) return { flagged: false, categories: [] };

  try {
    const res = await openai().moderations.create({ model: MODERATION_MODEL, input: text });
    const result = res.results[0];
    if (!result) return { flagged: false, categories: [] };
    const categories = Object.entries(result.categories)
      .filter(([, tripped]) => tripped)
      .map(([key]) => key);
    return { flagged: result.flagged, categories };
  } catch (err) {
    console.error("[moderation] check failed — failing open", err);
    return { flagged: false, categories: [] };
  }
}

/** Short human-readable reason for a content flag from a moderation result. */
export function moderationReason(result: ModerationResult): string {
  const cats = result.categories.length ? result.categories.join(", ") : "policy violation";
  return `Automated moderation flagged: ${cats}`;
}